Beispiel #1
0
    def test_transfer(self):
        """Testing SSH transfer() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        remote_file_copy = '/tmp/transfer_test_file'
        host1 = self.hosts[0]
        host2 = self.hosts[1]

        # remove remote test file copy(ignore error if not exist)
        g.run(host2, 'rm -f %s' % remote_file_copy)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode,  rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_orig = rout.strip()

        # transfer it
        g.transfer(host1, remote_file, host2, remote_file_copy)

        # md5sum remote file copy
        command = 'md5sum %s | awk \'{print $1}\'' % remote_file_copy
        rcode, rout, _ = g.run(host2, command)
        if rcode == 0:
            md5sum_copy = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_orig, md5sum_copy, 'md5sums do not match')
def cmd_run(cmd, hostname, raise_on_error=True):
    """Glusto's command runner wrapper.

    Args:
        cmd (str): Shell command to run on the specified hostname.
        hostname (str): hostname where Glusto should run specified command.
        raise_on_error (bool): defines whether we should raise exception
                               in case command execution failed.
    Returns:
        str: Stripped shell command's stdout value if not None.
    """
    ret, out, err = g.run(hostname, cmd, "root")
    if ("no ssh connection" in err.lower() or
            "tls handshake timeout" in err.lower()):
        g.ssh_close_connection(hostname)
        ret, out, err = g.run(hostname, cmd, "root")
    msg = ("Failed to execute command '%s' on '%s' node. Got non-zero "
           "return code '%s'. Err: %s" % (cmd, hostname, ret, err))
    if int(ret) != 0:
        g.log.error(msg)
    if raise_on_error:
        assert int(ret) == 0, msg

    out = out.strip() if out else out

    return out
Beispiel #3
0
 def test_upload(self):
     """Testing SSH upload() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     g.run(self.primary_host, 'rm -f /tmp/upload_test_file')
     rcode, rout, _ = g.run_local('md5sum /etc/hosts | awk \'{print $1}\'')
     if rcode == 0:
         md5sum = rout.strip()
     g.upload(self.primary_host,
              '/etc/hosts', '/tmp/upload_test_file')
     command = 'md5sum /tmp/upload_test_file | awk \'{print $1}\''
     rcode,  rout, _ = g.run(self.primary_host, command)
     if rcode == 0:
         md5sum_up = rout.strip()
     self.assertEqual(md5sum, md5sum_up, '')
def validate_multipath_pod(hostname, podname, hacount, mpath=""):
    '''
     This function validates multipath for given app-pod
     Args:
         hostname (str): ocp master node name
         podname (str): app-pod name for which we need to validate
                        multipath. ex : nginx1
         hacount (int): multipath count or HA count. ex: 3
     Returns:
         bool: True if successful,
               otherwise False
    '''
    cmd = "oc get pods -o wide | grep %s | awk '{print $7}'" % podname
    ret, out, err = g.run(hostname, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, hostname, out))
        return False
    pod_nodename = out.strip()
    active_node_count = 1
    enable_node_count = hacount - 1
    cmd = "multipath -ll %s | grep 'status=active' | wc -l" % mpath
    ret, out, err = g.run(pod_nodename, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, pod_nodename, out))
        return False
    active_count = int(out.strip())
    if active_node_count != active_count:
        g.log.error("active node count on %s for %s is %s and not 1"
                    % (pod_nodename, podname, active_count))
        return False
    cmd = "multipath -ll %s | grep 'status=enabled' | wc -l" % mpath
    ret, out, err = g.run(pod_nodename, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, pod_nodename, out))
        return False
    enable_count = int(out.strip())
    if enable_node_count != enable_count:
        g.log.error("passive node count on %s for %s is %s "
                    "and not %s" % (
                        pod_nodename, podname, enable_count,
                        enable_node_count))
        return False

    g.log.info("validation of multipath for %s is successfull"
               % podname)
    return True
Beispiel #5
0
    def test_download(self):
        """Testing SSH download() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        local_file = '/tmp/download_test_file'

        # remove local test file (ignore error if not exist)
        g.run_local('rm -f %s' % local_file)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode,  rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_up = rout.strip()

        # download it
        g.download(self.primary_host,
                   '/etc/hosts', '/tmp/download_test_file')

        # md5sum local copy
        command = 'md5sum %s | awk \'{print $1}\'' % local_file
        rcode, rout, _ = g.run_local(command)
        if rcode == 0:
            md5sum_down = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_down, md5sum_up, 'md5sums do not match')
 def test_stderr(self):
     """Testing output to stderr"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run(self.masternode, "uname -a >&2")
     self.assertEqual(rcode, 0)
     self.assertFalse(rout)
     self.assertTrue(rerr)
 def test_return_code(self):
     """Testing the return code"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run(self.masternode, "cat /etc/fstab")
     self.assertEqual(rcode, 0)
     self.assertTrue(rout)
     self.assertFalse(rerr)
    def setUpClass(cls):
        """Initialize all the variables necessary for test cases."""
        super(BaseClass, cls).setUpClass()

        # Initializes OCP config variables
        cls.ocp_servers_info = g.config['ocp_servers']
        cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys())
        cls.ocp_master_node_info = g.config['ocp_servers']['master']
        cls.ocp_client = list(g.config['ocp_servers']['client'].keys())
        cls.ocp_client_info = g.config['ocp_servers']['client']
        cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys())
        cls.ocp_nodes_info = g.config['ocp_servers']['nodes']

        # Initializes storage project config variables
        openshift_config = g.config.get("cns", g.config.get("openshift"))
        cls.storage_project_name = openshift_config.get(
            'storage_project_name',
            openshift_config.get('setup', {}).get('cns_project_name'))

        # Initializes heketi config variables
        heketi_config = openshift_config['heketi_config']
        cls.heketi_dc_name = heketi_config['heketi_dc_name']
        cls.heketi_service_name = heketi_config['heketi_service_name']
        cls.heketi_client_node = heketi_config['heketi_client_node']
        cls.heketi_server_url = heketi_config['heketi_server_url']
        cls.heketi_cli_user = heketi_config['heketi_cli_user']
        cls.heketi_cli_key = heketi_config['heketi_cli_key']

        cls.gluster_servers = list(g.config['gluster_servers'].keys())
        cls.gluster_servers_info = g.config['gluster_servers']

        cls.storage_classes = openshift_config['dynamic_provisioning'][
            'storage_classes']
        cls.sc = cls.storage_classes.get(
            'storage_class1', cls.storage_classes.get('file_storage_class'))
        cmd = "echo -n %s | base64" % cls.heketi_cli_key
        ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root")
        if ret != 0:
            raise ExecutionError("failed to execute cmd %s on %s out: %s "
                                 "err: %s" % (
                                     cmd, cls.ocp_master_node[0], out, err))
        cls.secret_data_key = out.strip()

        # Checks if heketi server is alive
        if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url):
            raise ConfigError("Heketi server %s is not alive"
                              % cls.heketi_server_url)

        # Switch to the storage project
        if not switch_oc_project(
                cls.ocp_master_node[0], cls.storage_project_name):
            raise ExecutionError("Failed to switch oc project on node %s"
                                 % cls.ocp_master_node[0])

        if 'glustotest_run_id' not in g.config:
            g.config['glustotest_run_id'] = (
                datetime.datetime.now().strftime('%H_%M_%d_%m_%Y'))
        cls.glustotest_run_id = g.config['glustotest_run_id']
        msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id)
        g.log.info(msg)
Beispiel #9
0
 def test_run(self):
     """Testing SSH run() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run(self.primary_host,
                               'echo -n %s' % self.test_string)
     self.assertEqual(rcode, 0)
     self.assertEqual(rout, self.test_string)
     print rout
     self.assertEqual(rerr, '')
 def test_stdout(self):
     """Testing output to stdout"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     # add a cleanup method to run after tearDown()
     self.addCleanup(self.cleanup_remote_commands)
     for node in g.config["nodes"]:
         rcode, rout, rerr = g.run(node, "ls -ld /etc")
     self.assertEqual(rcode, 0)
     self.assertTrue(rout)
     self.assertFalse(rerr)
    def _node_reboot(self):
        storage_hostname = (g.config["gluster_servers"]
                            [self.gluster_servers[0]]["storage"])

        cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
        ret, out, err = g.run(storage_hostname, cmd)

        self.addCleanup(self._wait_for_gluster_pod_to_be_ready)

        if ret != 255:
            err_msg = "failed to reboot host %s error: %s" % (
                storage_hostname, err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        try:
            g.ssh_close_connection(storage_hostname)
        except Exception as e:
            g.log.error("failed to close connection with host %s"
                        " with error: %s" % (storage_hostname, e))
            raise

        # added sleep as node will restart after 3 sec
        time.sleep(3)

        for w in Waiter(timeout=600, interval=10):
            try:
                if g.rpyc_get_connection(storage_hostname, user="******"):
                    g.rpyc_close_connection(storage_hostname, user="******")
                    break
            except Exception as err:
                g.log.info("exception while getting connection: '%s'" % err)

        if w.expired:
            error_msg = ("exceeded timeout 600 sec, node '%s' is "
                         "not reachable" % storage_hostname)
            g.log.error(error_msg)
            raise ExecutionError(error_msg)

        # wait for the gluster pod to be in 'Running' state
        self._wait_for_gluster_pod_to_be_ready()

        # glusterd and gluster-blockd service should be up and running
        service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
        for gluster_pod in self.gluster_pod_list:
            for service in service_names:
                g.log.info("gluster_pod - '%s' : gluster_service '%s'" % (
                    gluster_pod, service))
                check_service_status_on_pod(
                    self.oc_node, gluster_pod, service, "running"
                )
Beispiel #12
0
 def test_stress_stderr(self):
     """Send load of text output to stderr"""
     command = '''ls -Rail /etc > /tmp/railetc
         for i in $(seq 1 1000)
         do
             cat /tmp/railetc >&2
         done
         echo "Complete" >&2
         '''
     g.disable_log_levels('INFO')
     rcode, rout, rerr = g.run(self.primary_host, command)
     g.reset_log_levels()
     self.assertEqual(rcode, 0, 'stressing stderr failed')
     self.assertEqual(rout, '', 'sdtout has content.')
     self.assertNotEqual(rerr, '', 'stderr has no content.')
def run(target, command, user=None, log_level=None, orig_run=g.run):
    """Function that runs a command on a host or in a pod via a host.
    Wraps glusto's run function.

    Args:
        target (str|Pod): If target is str object and
            it equals to 'auto_get_gluster_endpoint', then
            Gluster endpoint gets autocalculated to be any of
            Gluster PODs or nodes depending on the deployment type of
            a Gluster cluster.
            If it is str object with other value, then it is considered to be
            an endpoint for command.
            If 'target' is of the 'Pod' type,
            then command will run on the specified POD.
        command (str|list): Command to run.
        user (str|None): user to be passed on to glusto's run method
        log_level (str|None): log level to be passed on to glusto's run method
        orig_run (function): The default implementation of the
            run method. Will be used when target is not a pod.

    Returns:
        A tuple of the command's return code, stdout, and stderr.
    """
    # NOTE: orig_run captures the glusto run method at function
    # definition time in order to capture the method before
    # any additional monkeypatching by other code

    if target == 'auto_get_gluster_endpoint':
        ocp_client_node = list(g.config['ocp_servers']['client'].keys())[0]
        gluster_pods = openshift_ops.get_ocp_gluster_pod_names(ocp_client_node)
        if gluster_pods:
            target = Pod(ocp_client_node, gluster_pods[0])
        else:
            target = list(g.config.get("gluster_servers", {}).keys())[0]

    if isinstance(target, Pod):
        prefix = ['oc', 'rsh', target.podname]
        if isinstance(command, six.string_types):
            cmd = ' '.join(prefix + [command])
        else:
            cmd = prefix + command

        # unpack the tuple to make sure our return value exactly matches
        # our docstring
        return g.run(target.node, cmd, user=user, log_level=log_level)
    else:
        return orig_run(target, command, user=user, log_level=log_level)
    def test_glusterblock_logs_presence_verification(self):
        """Validate presence of glusterblock provisioner POD and it's status"""
        gb_prov_cmd = ("oc get pods --all-namespaces "
                       "-l glusterfs=block-%s-provisioner-pod "
                       "-o=custom-columns=:.metadata.name,:.status.phase" % (
                           self.storage_project_name))
        ret, out, err = g.run(self.ocp_client[0], gb_prov_cmd, "root")

        self.assertEqual(ret, 0, "Failed to get Glusterblock provisioner POD.")
        gb_prov_name, gb_prov_status = out.split()
        self.assertEqual(gb_prov_status, 'Running')

        # Create Secret, SC and PVC
        self.create_storage_class()
        self.create_and_wait_for_pvc()

        # Get list of Gluster nodes
        g_hosts = list(g.config.get("gluster_servers", {}).keys())
        self.assertGreater(
            len(g_hosts), 0,
            "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts)

        # Perform checks on Gluster nodes/PODs
        logs = ("gluster-block-configshell", "gluster-blockd")

        gluster_pods = oc_get_pods(
            self.ocp_client[0], selector="glusterfs-node=pod")
        if gluster_pods:
            cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log"
        else:
            cmd = "tail -n 5 /var/log/gluster-block/%s.log"
        for g_host in g_hosts:
            for log in logs:
                out = cmd_run_on_gluster_pod_or_node(
                    self.ocp_client[0], cmd % log, gluster_node=g_host)
                self.assertTrue(out, "Command '%s' output is empty." % cmd)
def _get_openshift_version_str(hostname=None):
    """Gets OpenShift version from 'oc version' command.

    Args:
        hostname (str): Node on which the ocp command should run.
    Returns:
        str : oc version, i.e. 'v3.10.47'
    Raises: 'exceptions.ExecutionError' if failed to get version
    """
    if not hostname:
        hostname = list(g.config['ocp_servers']['client'].keys())[0]
    cmd = "oc version | grep openshift | cut -d ' ' -f 2"
    ret, out, err = g.run(hostname, cmd, "root")
    if ret != 0:
        msg = "Failed to get oc version. \n'err': %s\n 'out': %s" % (err, out)
        g.log.error(msg)
        raise AssertionError(msg)
    out = out.strip()
    if not out:
        error_msg = "Empty output from 'oc version' command: '%s'" % out
        g.log.error(error_msg)
        raise exceptions.ExecutionError(error_msg)

    return out
 def test_skip_me(self):
     """Testing the unittest skip feature"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, _, _ = g.run(self.masternode, "cat /etc/hosts")
     self.assertEqual(rcode, 0)
    def test_validate_snaps_256(self):

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # set config for 256 snpas (to make sure to override)
        cmd_str = ("gluster snapshot config snap-max-hard-limit 256"
                   " --mode=script")
        ret = g.run(self.mnode, cmd_str)
        self.assertTrue(ret, "Failed to set snap-max-hard-limit to 256.")
        g.log.info("snap-max-hard limit successfully set for 256.")

        # Create 256 snaps
        for i in range(1, 257, 1):
            cmd_str = "gluster snapshot create %s %s %s" % (
                "snapy%s" % i, self.volname, "no-timestamp")
            ret = g.run(self.mnode, cmd_str)
            self.assertTrue(
                ret, ("Failed to create snapshot for %s" % self.volname))
            g.log.info("Snapshot %s created successfully for volume  %s" %
                       ("snapy%s" % i, self.volname))

        # Check for no. of snaps using snap_list it should be 256
        snap_list = get_snap_list(self.mnode)
        self.assertTrue((len(snap_list) == 256), "No of snaps not consistent "
                        "for volume %s" % self.volname)
        g.log.info("Successfully validated number of snaps.")

        # Validate all 256 snap names created during
        for i in range(1, 257, 1):
            self.assertTrue(("snapy%s" % i in snap_list), "%s snap not "
                            "found " % ("snapy%s" % i))
        g.log.info("Sucessfully validated names of snap")

        # Try to create 257th snapshot
        cmd_str = "gluster snapshot create %s %s %s" % ("snap", self.volname,
                                                        "no-timestamp")
        ret = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 1, ("Unexpected: Successfully created 'snap'"
                                  " for  volume %s" % self.volname))
        g.log.info("Snapshot 'snap' not created as it is 257th snap")

        # Check for no. of snaps using snap_list it should be 256
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            256, len(snap_list), "No of snaps not consistent "
            "for volume %s" % self.volname)
        g.log.info("Successfully validated number of snaps.")
Beispiel #18
0
    def test_alert_time_out(self):
        """
        Verifying directory quota functionality with respect to
        alert-time, soft-timeout and hard-timeout.

        * Enable quota
        * Set limit on '/'
        * Set default-soft-limit to 50%
        * Set alert time to 1 sec
        * Set soft-timeout to 0 sec
        * Set hard timeout to 0 sec
        * Check quota list
        * Perform some IO such that soft limit is not exceeded
        * Check for alert message in brick logfile (NO alert present)
        * Perform IO and exceed the soft limit
        * Check for alert message in brick logfile (Alert present)
        * Remove some files so that usage falls below soft limit
        * Create some files such that hard limit is exceeded
        * Check for alert message in brick logfile
        * Remove some files so that usage falls below soft limit
        * Check for alert message in brick logfile (NO alert present)

        """

        # pylint: disable=too-many-statements
        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertFalse(ret, "Failed to enable quota on the volume %s"
                         % self.volname)
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Path to set quota limit
        path = "/"

        # Create a directory from mount point
        mount_obj = self.mounts[0]
        mount_dir = mount_obj.mountpoint
        client = mount_obj.client_system

        g.log.info("Creating dir named 'foo' from client %s",
                   client)
        ret = mkdir(client, "%s/foo" % mount_dir)
        self.assertTrue(ret, "Failed to create dir under %s-%s"
                        % (client, mount_dir))
        g.log.info("Directory 'foo' created successfully")

        # Set Quota limit on the root of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s",
                   path, self.volname)
        ret, _, _ = quota_limit_usage(self.mnode, self.volname,
                                      path=path, limit="100MB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # Set default soft limit to 50%
        g.log.info("Set default soft limit:")
        ret, _, _ = quota_set_default_soft_limit(self.mnode, self.volname,
                                                 '50%')
        self.assertEqual(ret, 0, ("Failed to set quota default soft limit"))
        g.log.info("Quota default soft limit set successfully")

        # Check quota list to validate limits
        g.log.info("List all files and directories:")
        ret = quota_validate(self.mnode, self.volname, path=path,
                             soft_limit_percent=50, hard_limit=104857600)
        self.assertTrue(ret, "Failed to validate Quota list")
        g.log.info("Quota List successful")

        # Set alert time to 1 second
        g.log.info("Set quota alert timeout:")
        ret, _, _ = quota_set_alert_time(self.mnode, self.volname, '1sec')
        self.assertEqual(ret, 0, ("Failed to set alert timeout"))
        g.log.info("Quota alert time set successful")

        # Set soft timeout to 0 second
        g.log.info("Set quota soft timeout:")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set soft timeout"))
        g.log.info("Quota soft timeout set successful")

        # Set hard timeout to 0 second
        g.log.info("Set quota hard timeout:")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set hard timeout"))
        g.log.info("Quota hard timeout set successful")

        # Get the brick log file path for a random node
        bricks = get_all_bricks(self.mnode, self.volname)
        selected_node, brick_path = random.choice(bricks[0:6]).split(':')
        brickpath = string.replace(brick_path, '/', '-')
        brickpathfinal = brickpath[1:]
        brick_log = "/var/log/glusterfs/bricks/%s.log" % brickpathfinal

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_1' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_1")

        # Starting IO on the mounts without crossing the soft limit
        # file creation should be normal
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/foo ; "
               "for i in `seq 1 9` ; "
               "do dd if=/dev/urandom of=file$i "
               "bs=5M "
               "count=1 ; "
               "done" % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, ("Failed to create files on mountpoint"))
        g.log.info("Files created succesfully on mountpoint")

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_2' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_2")

        # Soft limit not crossed
        # Check if alert message is logged in the brick (shouldn't be logged)
        g.log.info("Check for alert message in logfile:")
        ret = search_pattern_in_file(selected_node, "120004",
                                     brick_log, "appended_string_1",
                                     "appended_string_2")
        self.assertFalse(ret, "Found unnecessary alert in logfile")
        g.log.info("Alert message not seen before crossing soft limit")

        # Continue IO on the mounts to exceed soft limit
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/foo ; "
               "for i in `seq 1 200` ; "
               "do dd if=/dev/urandom of=foo$i "
               "bs=100K "
               "count=1 ; "
               "done" % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, ("Failed to create files on mountpoint"))
        g.log.info("Files created succesfully on mountpoint and "
                   "exceed soft limit")

        # Check if quota soft limit exceeded
        g.log.info("Check soft limit:")
        ret = quota_validate(self.mnode, self.volname, path=path,
                             sl_exceeded=True)
        self.assertTrue(ret, "Failed: soft limit not exceeded")
        g.log.info('Quota soft limit exceeded')

        # Inserting sleep of 2 seconds so the alert message gets enough time
        # to be logged
        time.sleep(2)

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_3' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_3")

        # Check if alert message logged in the brick
        g.log.info("Check for message:")
        ret = search_pattern_in_file(selected_node, "120004",
                                     brick_log, "appended_string_2",
                                     "appended_string_3")
        self.assertTrue(ret, "Alert message not found")
        g.log.info("Pattern Found: got alert message")

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_4' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_4")

        # Continue IO on the mounts by removing data
        g.log.info("Removing Files on %s:%s", client, mount_dir)
        cmd = ("rm -rfv %s/foo/foo* " % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, ("Failed to delete files on mountpoint"))
        g.log.info("Files removed succesfully from mountpoint and reached "
                   "below soft limit")

        # Check if quota soft limit exceeded
        g.log.info("Check soft limit:")
        ret = quota_validate(self.mnode, self.volname, path=path,
                             sl_exceeded=False)
        self.assertTrue(ret, "Failed: soft limit exceeded")
        g.log.info('Quota soft limit not exceeded')

        # Inserting sleep of 2 seconds so the alert message gets enough time
        # to be logged
        time.sleep(2)

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_5' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_5")

        # Check if alert message is logged in the brick
        g.log.info("Check for message:")
        ret = search_pattern_in_file(selected_node, "120004",
                                     brick_log, "appended_string_4",
                                     "appended_string_5")
        self.assertFalse(ret, "Found unnecessary alert message in logfile")
        g.log.info("Alert message not seen before crossing soft limit")

        # Continue IO on the mounts to exceed hard limit
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/foo ; "
               "for i in `seq 11 20` ; "
               "do dd if=/dev/urandom of=file$i "
               "bs=10M "
               "count=1 ; "
               "done" % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 1, ("Failed: Files created successfully inspite "
                                  "of crossing hard-limit"))
        g.log.info("Files creation stopped on mountpoint once exceeded "
                   "hard limit")

        # Inserting sleep of 2 seconds so the alert message gets enough time
        # to be logged
        time.sleep(2)

        # Append unique string to the brick log
        g.log.info("Appending string 'appended_string_6' to the log:")
        append_string_to_file(selected_node, brick_log, "appended_string_6")

        # Check if alert message is logged in the brick
        g.log.info("Check for message:")
        ret = search_pattern_in_file(selected_node, "120004",
                                     brick_log, "appended_string_5",
                                     "appended_string_6")
        self.assertTrue(ret, "Alert message not seen in logfile")
        g.log.info("Pattern Found: got alert message")

        # Append unique string to the brick log
        g.log.info("Appending string 'Done_with_alert_check_7' to the log:")
        append_string_to_file(selected_node, brick_log,
                              "Done_with_alert_check_7")

        # Continue IO on the mounts by removing data to come below hard limit
        g.log.info("Removing Files on %s:%s", client, mount_dir)
        cmd = ("rm -rfv %s/foo/file{11..20}" % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, ("Failed to delete files on mountpoint"))
        g.log.info("Files removed succesfully on mountpoint and "
                   "reached below soft limit")

        # Inserting sleep of 2 seconds so the alert message gets enough time
        # to be logged
        time.sleep(2)

        # Append unique string to the brick log
        g.log.info("Appending string 'Done_with_alert_check_8' to the log:")
        append_string_to_file(selected_node, brick_log,
                              "Done_with_alert_check_8")

        # Check if alert message is logged in the brick
        g.log.info("Check for message:")
        ret = search_pattern_in_file(selected_node, "120004",
                                     brick_log, "Done_with_alert_check_7",
                                     "Done_with_alert_check_8")
        self.assertFalse(ret, "Found unnecessary alert in logfile")
        g.log.info("EXPECTED: Alert message not seen before crossing "
                   "soft limit")

        # have got below the soft and hard limit
        # check quota list
        g.log.info("List all files and directories:")
        ret = quota_validate(self.mnode, self.volname, path=path,
                             sl_exceeded=False, hl_exceeded=False)
        self.assertTrue(ret, "Failed to validate Quota list with "
                             "soft and hard limits")
        g.log.info("Quota List validated successfully")
Beispiel #19
0
 def tearDownClass(cls):
     """unittest tearDownClass override"""
     print "Tearing Down Class: %s" % cls.__name__
     g.run(cls.primary_host, 'rm -f /tmp/railetc')
     g.run(cls.primary_host, 'rm -f /tmp/upload_test_file')
     g.run(cls.hosts[1], 'rm -f /tmp/transfer_test_file')
Beispiel #20
0
def get_remove_brick_status(mnode, volname, bricks_list):
    """Parse the output of 'gluster vol remove-brick status' command
       for the given volume

    Args:
        mnode (str): Node on which command has to be executed.
        volname (str): volume name
        bricks_list (list): List of bricks participating in
        remove-brick operation

    Returns:
        NoneType: None if command execution fails, parse errors.
        dict: dict on success. rebalance status will be
            in dict format

    Examples:
        >>> get_remove_brick_status('abc.lab.eng.xyz.com', testvol, bricklist)
        {'node': [{'files': '0', 'status': '3', 'lookups': '0', 'skipped': '0'
            , 'nodeName': 'localhost', 'failures': '0', 'runtime': '0.00','id'
            : '6662bdcd-4602-4f2b-ac1a-75e6c85e780c', 'statusStr':
            'completed', 'size': '0'}], 'task-id': '6a135147-b202-4e69-
            b48c-b1c6408b9d24', 'aggregate': {'files': '0', 'status': '3',
                'lookups': '0', 'skipped': '0', 'failures': '0', 'runtime':
                '0.00', 'statusStr': 'completed', 'size': '0'}, 'nodeCount'
            : '3'}

    """

    cmd = ("gluster volume remove-brick %s %s status --xml" %
           (volname, ' '.join(bricks_list)))
    ret, out, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error("Failed to execute 'remove-brick status' on node %s",
                    mnode)
        return None

    try:
        root = etree.XML(out)
    except etree.ParseError:
        g.log.error(
            "Failed to parse the remove-brick status"
            "xml output on volume %s", volname)
        return None

    remove_brick_status = {}
    remove_brick_status["node"] = []
    for info in root.findall("volRemoveBrick"):
        for element in info.getchildren():
            if element.tag == "node":
                status_info = {}
                for elmt in element.getchildren():
                    status_info[elmt.tag] = elmt.text
                remove_brick_status[element.tag].append(status_info)
            elif element.tag == "aggregate":
                status_info = {}
                for elmt in element.getchildren():
                    status_info[elmt.tag] = elmt.text
                remove_brick_status[element.tag] = status_info
            else:
                remove_brick_status[element.tag] = element.text
    return remove_brick_status
Beispiel #21
0
    def test_auth_allow_ip_fqdn(self):
        """
        Verify auth.allow feature using a combination of client ip and fqdn.
        Steps:
        1. Setup and start volume
        2. Set auth.allow on volume using ip of client1 and hostname of
           client2.
        3. Mount the volume on client1 and client2.
        5. Create directory d1 on client1 mountpoint.
        6. Unmount the volume from client1 and client2.
        7. Set auth.allow on d1 using ip of client1 and hostname of client2.
        8. Mount d1 on client1 and client2.
        9. Unmount d1 from client1 and client2.
        """
        # Obtain hostname of client2
        ret, hostname_client2, _ = g.run(self.mounts[1].client_system,
                                         "hostname")
        self.assertEqual(ret, 0, ("Failed to obtain hostname of client %s"
                                  % self.mounts[1].client_system))
        hostname_client2 = hostname_client2.strip()
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mounts[1].client_system, hostname_client2)

        # Setting authentication on volume using ip of client1 and hostname of
        # client2.
        auth_dict = {'all': [self.mounts[0].client_system, hostname_client2]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set authentication")
        g.log.info("Successfully set authentication on volume")

        # Mount volume on client1
        self.mount_and_verify(self.mounts[0])

        # Mount volume on client2
        self.mount_and_verify(self.mounts[1])

        g.log.info("Successfully mounted volume on client1 and client2.")

        # Creating directory d1 on mounted volume
        ret = mkdir(self.mounts[0].client_system, "%s/d1"
                    % self.mounts[0].mountpoint)
        self.assertTrue(ret, ("Failed to create directory 'd1' in volume %s "
                              "from client %s"
                              % (self.volname, self.mounts[0].client_system)))

        # Unmount volume from client1.
        ret = self.mounts[0].unmount()
        self.assertTrue(ret, "Failed to unmount volume from client1.")

        # Unmount volume from client2.
        ret = self.mounts[1].unmount()
        self.assertTrue(ret, "Failed to unmount volume from client2.")

        # Setting authentication on d1 using ip of client1 and hostname of
        # client2.
        auth_dict = {'/d1': [self.mounts[0].client_system, hostname_client2]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set authentication")
        g.log.info("Successfully set authentication on volume")

        # Modify GlusterMount objects for mounting sub-directory d1.
        self.mounts[0].volname = "%s/d1" % self.volname
        self.mounts[1].volname = "%s/d1" % self.volname

        # Mount sub-directory d1 on client1
        self.mount_and_verify(self.mounts[0])

        # Mount sub-directory d1 on client2
        self.mount_and_verify(self.mounts[1])

        g.log.info("Successfully mounted sub-dir d1 on client1 and client2.")

        # Unmount sub-directory d1 from client1.
        ret = self.mounts[0].unmount()
        self.assertTrue(ret, "Failed to unmount volume from client1.")

        # Unmount sub-directory d1 from client2.
        ret = self.mounts[1].unmount()
        self.assertTrue(ret, "Failed to unmount volume from client2.")
    def test_volume_set_option_data_self_heal(self):
        """
        - turn off self-heal-daemon option
        - turn off data-self-heal option
        - check if the options are set correctly
        - create IO
        - calculate arequal
        If it is distribute-replicate, the  areequal-check sum of nodes
        in each replica set should match
        - bring down "brick1"
        - modify IO
        - bring back the brick1
        - execute "find . | xargs stat" from the mount point
        to trigger background data self-heal
        - calculate arequal
        If it is distribute-replicate, arequal's checksum of brick which
        was down should not match with the bricks which was up
        in the replica set but for other replicaset where all bricks are up
        should match the areequal-checksum
        - check if the data of existing files are not modified in brick1
        - turn on the option data-self-heal
        - execute "find . -type f  | xargs md5sum" from the mount point
        - wait for heal to complete
        - calculate areequal
        If it is distribute-replicate, the  areequal-check sum of nodes
        in each replica set should match
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches

        all_bricks = get_all_bricks(self.mnode, self.volname)

        # Setting options
        options = {"self-heal-daemon": "off", "data-self-heal": "off"}
        g.log.info('Setting options %s...', options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Check if options are set to off
        options_dict = get_volume_options(self.mnode, self.volname)
        self.assertEqual(options_dict['cluster.self-heal-daemon'], 'off',
                         'Option self-heal-daemon is not set to off')
        self.assertEqual(options_dict['cluster.data-self-heal'], 'off',
                         'Option data-self-heal is not set to off')
        g.log.info('Option are set to off: %s', options)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files and dirs...')
            command = ('cd %s ; '
                       'mkdir test_data_self_heal ;'
                       'cd test_data_self_heal ; '
                       'for i in `seq 1 100` ; '
                       'do dd if=/dev/urandom of=file.$i bs=128K count=$i ; '
                       'done ;' % mount_obj.mountpoint)

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Check arequals
        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume: %s", num_subvols)

        # Get arequals and compare
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

            # Get arequal for every brick and compare with first brick
            for brick in subvol_brick_list:
                node, brick_path = brick.split(':')
                command = ('arequal-checksum -p %s '
                           '-i .glusterfs -i .landfill -i .trashcan' %
                           brick_path)
                ret, brick_arequal, _ = g.run(node, command)
                self.assertFalse(ret,
                                 'Failed to get arequal on brick %s' % brick)
                g.log.info('Getting arequal for %s is successful', brick)
                brick_total = brick_arequal.splitlines()[-1].split(':')[-1]

                self.assertEqual(
                    first_brick_total, brick_total,
                    'Arequals for subvol and %s are not equal' % brick)
                g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')

        # Select bricks to bring offline
        bricks_to_bring_offline = [get_all_bricks(self.mnode, self.volname)[0]]

        # Get files/dir size
        g.log.info('Getting file/dir list on brick to be offline')
        node, brick_path = bricks_to_bring_offline[0].split(':')
        # Get files/dir list
        command = 'cd %s ; ls' % brick_path
        ret, file_list, _ = g.run(node, command)
        self.assertFalse(ret, 'Failed to ls files on %s' % node)
        brick_file_dir_list = file_list.splitlines()
        # Get files/dir size before bringing brick offline
        g.log.info('Getting file/dir size on brick to be offline')
        brick_file_dir_dict_before_offline = {}
        for file_dir in brick_file_dir_list:
            command = 'cd %s ; du -h %s' % (brick_path, file_dir)
            ret, file_info, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get file size on %s' % node)
            file_size = file_info.split('\t')[0]
            brick_file_dir_dict_before_offline[file_dir] = file_size

        # Bring brick 1 offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify data
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Adding data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # changing files
            g.log.info('Creating dirs and files...')
            command = ('cd test_data_self_heal ; '
                       'for i in `seq 1 100` ; '
                       'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; '
                       'done ;' % mount_obj.mountpoint)

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Trigger heal from mount point
        g.log.info('Triggering heal from mount point...')
        for mount_obj in self.mounts:
            g.log.info("Triggering heal for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            command = ('cd %s/test_data_self_heal ; find . | xargs stat' %
                       mount_obj.mountpoint)
            ret, _, _ = g.run(mount_obj.client_system, command)
            self.assertFalse(
                ret, 'Failed to start "find . | xargs stat" '
                'on %s' % mount_obj.client_system)

        # Check arequals
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        subvols = subvols_dict['volume_subvols']

        # Get arequals for first subvol and compare
        first_brick = all_bricks[0]
        node, brick_path = first_brick.split(':')
        command = ('arequal-checksum -p %s '
                   '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
        ret, arequal, _ = g.run(node, command)
        first_brick_total = arequal.splitlines()[-1].split(':')[-1]

        for brick in subvols[0]:
            g.log.info('Getting arequal on bricks %s...', brick)
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = arequal.splitlines()[-1].split(':')[-1]

            if brick != first_brick:
                self.assertNotEqual(
                    first_brick_total, brick_total,
                    'Arequals for mountpoint and %s '
                    'are equal' % brick)
                g.log.info('Arequals for mountpoint and %s are not equal',
                           brick)
            else:
                self.assertEqual(
                    first_brick_total, brick_total,
                    'Arequals for mountpoint and %s '
                    'are not equal' % brick)
                g.log.info('Arequals for mountpoint and %s are equal', brick)

        # Get arequals for all subvol except first and compare
        num_subvols = len(subvols_dict['volume_subvols'])
        for i in range(1, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

            # Get arequal for every brick and compare with first brick
            for brick in subvol_brick_list:
                node, brick_path = brick.split(':')
                command = ('arequal-checksum -p %s '
                           '-i .glusterfs -i .landfill -i .trashcan' %
                           brick_path)
                ret, brick_arequal, _ = g.run(node, command)
                self.assertFalse(ret,
                                 'Failed to get arequal on brick %s' % brick)
                g.log.info('Getting arequal for %s is successful', brick)
                brick_total = brick_arequal.splitlines()[-1].split(':')[-1]

                self.assertEqual(
                    first_brick_total, brick_total,
                    'Arequals for subvol and %s are not equal' % brick)
                g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')

        # Get files/dir size after bringing brick online
        g.log.info('Getting file/dir size on brick after bringing online')
        brick_file_dir_dict_after_online = {}
        for file_dir in brick_file_dir_list:
            command = 'cd %s ; du -h %s' % (brick_path, file_dir)
            ret, file_info, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get file size on %s' % node)
            file_size = file_info.split('\t')[0]
            brick_file_dir_dict_after_online[file_dir] = file_size

        # Compare dicts with file size
        g.log.info('Compare file/dir size on brick before bringing offline and'
                   ' after bringing online')
        self.assertFalse(
            cmp(brick_file_dir_dict_before_offline,
                brick_file_dir_dict_after_online),
            'file/dir size on brick before bringing offline and '
            'after bringing online are not equal')
        g.log.info('file/dir size on brick before bringing offline and '
                   'after bringing online are equal')

        # Setting options
        options = {"data-self-heal": "on"}
        g.log.info('Setting options %s...', options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Option 'data-self-heal' is set to 'on' successfully")

        # Start heal from mount point
        g.log.info('Starting heal from mount point...')
        for mount_obj in self.mounts:
            g.log.info("Start heal for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            command = ('cd %s/test_data_self_heal ; '
                       ' find . | xargs md5sum' % mount_obj.mountpoint)
            _, _, _ = g.run(mount_obj.client_system, command)

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Check arequals
        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume: %s", num_subvols)

        # Get arequals and compare
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

            # Get arequal for every brick and compare with first brick
            for brick in subvol_brick_list:
                node, brick_path = brick.split(':')
                command = ('arequal-checksum -p %s '
                           '-i .glusterfs -i .landfill -i .trashcan' %
                           brick_path)
                ret, brick_arequal, _ = g.run(node, command)
                self.assertFalse(ret,
                                 'Failed to get arequal on brick %s' % brick)
                g.log.info('Getting arequal for %s is successful', brick)
                brick_total = brick_arequal.splitlines()[-1].split(':')[-1]

                self.assertEqual(
                    first_brick_total, brick_total,
                    'Arequals for subvol and %s are not equal' % brick)
                g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')
    def test_snap_self_heal(self):
        """
        Steps:

        1. create a volume
        2. mount volume
        3. create snapshot of that volume
        4. Activate snapshot
        5. Clone snapshot and Mount
        6. Perform I/O
        7. Bring Down Few bricks from volume without
           affecting the volume or cluster.
        8. Perform I/O
        9. Bring back down bricks to online
        10. Validate heal is complete with areequal

        """
        # pylint: disable=too-many-statements, too-many-locals
        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(ret, 0, ("Failed to create snapshot for volume %s"
                                  % self.volname))
        g.log.info("Snapshot %s created successfully for volume %s", self.snap,
                   self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0, ("Failed to Activate snapshot %s"
                                  % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # snapshot list
        ret, _, _ = snap_list(self.mnode)
        self.assertEqual(ret, 0, ("Failed to list all the snapshot"))
        g.log.info("Snapshot list command was successful")

        # Creating a Clone volume from snapshot:
        g.log.info("Starting to Clone volume from Snapshot")
        ret, _, _ = snap_clone(self.mnode, self.snap, self.clone)
        self.assertEqual(ret, 0, ("Failed to clone %s from snapshot %s"
                                  % (self.clone, self.snap)))
        g.log.info("%s created successfully", self.clone)

        #  start clone volumes
        g.log.info("start to created clone volumes")
        ret, _, _ = volume_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start clone %s" % self.clone)
        g.log.info("clone volume %s started successfully", self.clone)

        # Mounting a clone volume
        g.log.info("Mounting a clone volume")
        ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1,
                                 self.mnode, self.clients[0])
        self.assertEqual(ret, 0, "Failed to mount clone Volume %s"
                         % self.clone)
        g.log.info("Clone volume %s mounted Successfully", self.clone)

        # Checking cloned volume mounted or not
        ret = is_mounted(self.clone, self.mount1, self.mnode,
                         self.clients[0], self.mount_type)
        self.assertTrue(ret, "Failed to mount clone volume on mount point: %s"
                        % self.mount1)
        g.log.info("clone Volume %s mounted on %s", self.clone, self.mount1)

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" % (
                   self.script_upload_path,
                   self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.clone)
        bricks_list = get_all_bricks(self.mnode, self.clone)
        g.log.info("Brick List : %s", bricks_list)

        # Select bricks to bring offline
        g.log.info("Starting to bring bricks to offline")
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']

        g.log.info("Brick to bring offline: %s ", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.clone, bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring the bricks offline")
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Offline Bricks list
        offline_bricks = get_offline_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(offline_bricks, "Failed to get offline bricklist"
                             "for volume %s" % self.clone)
        for bricks in offline_bricks:
            self.assertIn(bricks, bricks_to_bring_offline,
                          "Failed to validate "
                          "Bricks offline")
        g.log.info("Bricks Offline: %s", offline_bricks)

        # Online Bricks list
        online_bricks = get_online_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(online_bricks, "Failed to get online bricks"
                             " for volume %s" % self.clone)
        g.log.info("Bricks Online: %s", online_bricks)

        # write files mountpoint
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" % (
                   self.script_upload_path,
                   self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # Bring all bricks online
        g.log.info("bring all bricks online")
        ret = bring_bricks_online(self.mnode, self.clone,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring bricks online")
        g.log.info("Successful in bringing all bricks online")

        # Validate Bricks are online
        g.log.info("Validating all bricks are online")
        ret = are_bricks_online(self.mnode, self.clone, bricks_list)
        self.assertTrue(ret, "Failed to bring all the bricks online")
        g.log.info("bricks online: %s", bricks_list)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.clone)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online" % self.clone))
        g.log.info("Successful in waiting for volume %s processes to be "
                   "online", self.clone)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.clone)
        self.assertTrue(ret, ("Volume %s : All process are not online"
                              % self.clone))
        g.log.info("Volume %s : All process are online", self.clone)

        # wait for the heal process to complete
        g.log.info("waiting for heal process to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to complete the heal process")
        g.log.info("Successfully completed heal process")

        # Check areequal
        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.clone)
        subvols = get_subvols(self.mnode, self.clone)
        num_subvols = len(subvols['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Get arequals and compare
        g.log.info("Starting to Compare areequals")
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan'
                       % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

        # Get arequal for every brick and compare with first brick
        for brick in subvol_brick_list:
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan'
                       % brick_path)
            ret, brick_arequal, _ = g.run(node, command)
            self.assertFalse(ret,
                             'Failed to get arequal on brick %s'
                             % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
            self.assertEqual(first_brick_total, brick_total,
                             'Arequals for subvol and %s are not equal'
                             % brick)
            g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')
    def test_client_side_quorum_with_auto_option(self):
        """
        Test Script to verify the Client Side Quorum with auto option

        * set cluster.quorum-type to auto.
        * start I/O from the mount point.
        * kill 2 of the brick process from the each and every replica set
        * perform ops

        """
        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting cluster.quorum-type to auto on "
                   "volume %s" % self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Sucessfully set %s for volume %s"
                   % (options, self.volname))

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s" % self.mounts)
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 10 --base-file-name file %s" % (self.script_upload_path,
                                                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating IO on mounts")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s" % self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:" % num_subvols)

        # bring bricks offline( 2 bricks ) for all the subvolumes
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s"
                       % (i, subvol_brick_list))
            # For volume type: 1 * 2, bring 1 brick offline
            if len(subvol_brick_list) == 2:
                bricks_to_bring_offline = subvol_brick_list[0:1]
            else:
                bricks_to_bring_offline = subvol_brick_list[0:2]
            g.log.info("Going to bring down the brick process "
                       "for %s" % bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s succesfully" % bricks_to_bring_offline)

        # create 2 files named newfile0.txt and newfile1.txt
        g.log.info("Start creating 2 files on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 2 --base-file-name newfile %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with read-only filesystem")
        ret = is_io_procs_fail_with_rofs(self, all_mounts_procs, self.mounts)
        self.assertTrue(ret, ("Unexpected error and IO successfull"
                              " on read-only filesystem"))
        g.log.info("EXPECTED: Read-only file system in IO while creating file")

        # create directory user1
        g.log.info("Start creating directory on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_deep_dir "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with read-only filesystem")
        ret = is_io_procs_fail_with_rofs(self, all_mounts_procs, self.mounts)
        self.assertTrue(ret, ("Unexpected error and IO successfull"
                              " on read-only filesystem"))
        g.log.info("EXPECTED: Read-only file system in IO while"
                   " creating directory")

        # create h/w link to file
        g.log.info("Start creating hard link for file0.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "ln %s/file0.txt %s/file0.txt_hwlink" \
                  % (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and creating hard link"
                                  " successful on read-only filesystem"))
            self.assertIn("Read-only file system",
                          err, "Read-only filesystem not found in "
                               "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # create s/w link
        g.log.info("Start creating soft link for file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "ln -s %s/file1.txt %s/file1.txt_swlink" %\
                  (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and creating soft link"
                                  " successful on read-only filesystem"))
            self.assertIn("Read-only file system",
                          err, "Read-only filesystem not found in "
                               "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # append to file
        g.log.info("Appending to file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "cat %s/file0.txt >> %s/file1.txt" %\
                  (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and append successful"
                                  " on read-only filesystem"))
            self.assertIn("Read-only file system",
                          err, "Read-only filesystem not found in "
                               "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # modify the file
        g.log.info("Modifying file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "echo 'Modify Contents' > %s/file1.txt"\
                  % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and modifying successful"
                                  " on read-only filesystem"))
            self.assertIn("Read-only file system",
                          err, "Read-only filesystem not found in "
                               "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # truncate the file
        g.log.info("Truncating file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "truncate -s 0 %s/file1.txt" % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and truncating file"
                                  " successful on read-only filesystem"))
            self.assertIn("Read-only file system",
                          err, "Read-only filesystem not found in "
                               "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # read the file
        g.log.info("Starting reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("validating IO on all mounts")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "Reads failed on some of the clients")
        g.log.info("Reads successful on all mounts")

        # stat on file
        g.log.info("stat on file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "stat %s/file1.txt" % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and stat on file fails"
                                   " on read-only filesystem"))
            g.log.info("stat on file is successfull on read-only filesystem")

        # stat on dir
        g.log.info("stat on directory on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s stat %s"
                   % (self.script_upload_path, mount_obj.mountpoint))
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and stat on directory"
                                   " fails on read-only filesystem"))
            g.log.info("stat on dir is successfull on read-only filesystem")

        # ls on mount point
        g.log.info("ls on mount point on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s ls %s"
                   % (self.script_upload_path, mount_obj.mountpoint))
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and listing file fails"
                                   " on read-only filesystem"))
            g.log.info("listing files is successfull on read-only filesystem")
    def test_ec_quorumcount_5(self):
        """
        Test Steps:
        - Write IO's when all bricks are online
        - Get subvol from which bricks to be brought down
        - Set volume disperse quorum count to 5
        - Start writing and reading IO's
        - Bring a brick down,say b1
        - Validate write and read is successful
        - Bring a brick down,say b2
        - Validate write has failed and read is successful
        - Start IO's again while quorum is not met on volume
          write should fail and read should pass
        - Add-brick and log
        - Start Rebalance
        - Wait for rebalance,which should fail as quorum is not met
        - Bring brick online
        - Wait for brick to come online
        - Check if bricks are online
        - Start IO's again when all bricks are online
        - IO's should complete successfully
        - Start IO's again and reset volume
        - Bring down other bricks to max redundancy
        - Validating IO's and waiting to complete
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals

        mountpoint = self.mounts[0].mountpoint
        client1 = self.mounts[0].client_system
        client2 = self.mounts[1].client_system

        # Write IO's  when all bricks are online
        writecmd = ("cd %s; for i in `seq 1 100` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        # Select a subvol from which bricks to be brought down
        sub_vols = get_subvols(self.mnode, self.volname)
        bricks_list1 = list(choice(sub_vols['volume_subvols']))
        brick_1, brick_2 = sample(bricks_list1, 2)

        # Set volume disperse quorum count to 5
        ret = set_volume_options(self.mnode, self.volname,
                                 {"disperse.quorum-count": "5"})
        self.assertTrue(
            ret, 'Failed to set volume {}'
            ' options'.format(self.volname))
        g.log.info('Successfully set disperse quorum on %s', self.volname)

        # Start writing and reading IO's
        procwrite, procread, count = [], [], 1
        for mount_obj in self.mounts:
            writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                        "--dirname-start-num %d --dir-depth 5 "
                        "--dir-length 10 --max-num-of-dirs 2 "
                        "--num-of-files 15 %s" %
                        (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               writecmd,
                               user=mount_obj.user)
            procwrite.append(proc)
            count += 10

        self.generate_read_cmd(mountpoint, '1', '10')
        ret = g.run_async(client2, self.readcmd)
        procread.append(ret)

        # Brick 1st brick down
        ret = bring_bricks_offline(self.volname, brick_1)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1))
        g.log.info('Brick %s is offline successfully', brick_1)

        writecmd = ("cd %s; for i in `seq 101 110` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        self.generate_read_cmd(mountpoint, '101', '110')
        ret, _, err = g.run(client1, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished reads on files sucessfully')

        # Brick 2nd brick down
        ret = bring_bricks_offline(self.volname, brick_2)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2))
        g.log.info('Brick %s is offline successfully', brick_2)

        # Validate write has failed and read is successful
        ret = validate_io_procs(procwrite, self.mounts)
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        ret = validate_io_procs(procread, self.mounts[1])
        self.assertTrue(ret, 'Read operation failed on the client')
        g.log.info('Reads on files successful')

        # Start IO's again while quorum is not met on volume
        procwrite = []
        writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                    "--dirname-start-num 20 --dir-depth 1 "
                    "--dir-length 10 --max-num-of-dirs 1 "
                    "--num-of-files 10 %s" %
                    (self.script_upload_path, mountpoint))
        proc = g.run_async(client1, writecmd)
        procwrite.append(proc)
        ret = validate_io_procs(procwrite, self.mounts[0])
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        self.generate_read_cmd(mountpoint, '1', '100')
        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Reads on files successful')

        # Add brick
        ret = expand_volume(self.mnode,
                            self.volname,
                            self.servers,
                            self.all_servers_info,
                            force=True)
        self.assertTrue(
            ret, ("Failed to expand the volume {}".format(self.volname)))
        g.log.info("Expanding volume %s is successful", self.volname)

        # Log Volume Info and Status after expanding the volume
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume {}".format(self.volname)))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ('Rebalance failed on the volume'
                                  ' {}'.format(self.volname)))
        g.log.info('Rebalance has started on volume %s', self.volname)

        # Wait for rebalance to complete
        # Which should also fail as quorum is not met
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=600)
        self.assertFalse(
            ret, "Rebalance passed though disperse quorum "
            "is not met on volume")
        g.log.info(
            "Expected: Rebalance failed on the volume %s,disperse"
            " quorum is not met", self.volname)

        # Bring brick online
        brick_list = brick_1, brick_2
        ret = bring_bricks_online(self.mnode, self.volname, brick_list)
        self.assertTrue(ret, 'Brick not brought online')
        g.log.info('Brick brought online successfully')

        # Wait for brick to come online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, 'Bricks are not online')
        g.log.info('EXPECTED : Bricks are online')

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Start IO's again when all bricks are online
        writecmd = ("cd %s; for i in `seq 101 200` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)
        self.generate_read_cmd(mountpoint, '101', '120')

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Writes on client % successful', client1)

        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Read on client % successful', client2)

        # Start IO's again
        all_mounts_procs, count = [], 30
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count += 10

        # Reset volume
        ret, _, err = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, err)
        g.log.info('Reset of volume %s successful', self.volname)

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        bricks_to_offline = sample(bricks_list1, 2)
        ret = bring_bricks_offline(self.volname, bricks_to_offline)
        self.assertTrue(ret, 'Redundant bricks not offline')
        g.log.info('Redundant bricks are offline successfully')

        # Validating IO's and waiting to complete
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, 'IO failed on some of the clients')
        g.log.info("Successfully validated all IO's")
    def test_self_heal_symbolic_links(self):
        """
        Test Self-Heal of Symbolic Links (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        "data-self-heal-algorithm": "diff"
        "self-heal-daemon": "off"
        - create IO
        - calculate arequal
        - bring down all bricks processes from selected set
        - calculate arequals and compare with arequal
        before bringing bricks offline
        - modify the data and verify whether the links are properly created
        - calculate arequal before getting bricks online
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check is heal is complited
        - check for split-brain
        - calculate arequal after getting bricks online and compare with
        arequal before getting bricks online
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
            "self-heal-daemon": "off"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "'self-heal-daemon' "
                   "are set to 'off' successfully")

        # Creating files on client side
        all_mounts_procs = []
        test_sym_link_self_heal_folder = 'test_sym_link_self_heal'
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Creating files
        command = ("cd %s/ ; "
                   "mkdir %s ; "
                   "cd %s/ ;"
                   "for i in `seq 1 5` ; "
                   "do mkdir dir.$i ; "
                   "for j in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=dir.$i/file.$j "
                   "bs=1k count=$j ; "
                   "done ; "
                   "done ;" %
                   (self.mounts[0].mountpoint, test_sym_link_self_heal_folder,
                    test_sym_link_self_heal_folder))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks offline
        g.log.info('Getting arequal before getting bricks offline...')
        ret, result_before_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Get arequal after getting bricks offline
        g.log.info('Getting arequal after getting bricks offline...')
        ret, result_after_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks offline '
                   'is successful')

        # Checking arequals before bringing bricks offline
        # and after bringing bricks offline
        self.assertItemsEqual(
            result_before_offline, result_after_offline,
            'Checksums before and '
            'after bringing bricks online are not equal')
        g.log.info('Checksums before and after bringing bricks online '
                   'are equal')

        # Modify the data
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Create symlinks
        g.log.info('Creating symlinks...')
        command = ("cd %s/%s/ ; "
                   "for i in `seq 1 5` ; "
                   "do ln -s dir.$i sym_link_dir.$i ; "
                   "done ;" %
                   (self.mounts[0].mountpoint, test_sym_link_self_heal_folder))
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(
            ret, 0,
            'Failed to modify the data for %s...' % self.mounts[0].mountpoint)
        g.log.info('Modifying the data for %s is successful',
                   self.mounts[0].mountpoint)

        # Verify whether the links are properly created
        # Get symlink list
        command = ("cd %s/%s/ ; "
                   "ls |grep 'sym'" %
                   (self.mounts[0].mountpoint, test_sym_link_self_heal_folder))
        _, out, _ = g.run(self.mounts[0].client_system, command)
        symlink_list = out.strip().split('\n')

        # Get folder list
        command = ("cd %s/%s/ ; "
                   "ls |grep -v 'sym'" %
                   (self.mounts[0].mountpoint, test_sym_link_self_heal_folder))
        _, out, _ = g.run(self.mounts[0].client_system, command)
        folder_list = out.strip().split('\n')

        # Compare symlinks and folders
        for symlink in symlink_list:
            symlink_index = symlink_list.index(symlink)
            command = ("cd %s/%s/ ; "
                       "readlink %s" %
                       (self.mounts[0].mountpoint,
                        test_sym_link_self_heal_folder, symlink))
            _, out, _ = g.run(self.mounts[0].client_system, command)
            symlink_to_folder = out.strip()
            self.assertEqual(symlink_to_folder, folder_list[symlink_index],
                             'Links are not properly created')
            g.log.info('Links for %s are properly created',
                       self.mounts[0].mountpoint)

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks online
        # and after bringing bricks online
        self.assertItemsEqual(
            result_before_online, result_after_online, 'Checksums before and '
            'after bringing bricks online are not equal')
        g.log.info('Checksums before and after bringing bricks online '
                   'are equal')
Beispiel #27
0
def bring_bricks_online(mnode,
                        volname,
                        bricks_list,
                        bring_bricks_online_methods=None):
    """Bring the bricks specified in the bricks_list online.

    Args:
        mnode (str): Node on which commands will be executed.
        volname (str): Name of the volume.
        bricks_list (list): List of bricks to bring them online.

    Kwargs:
        bring_bricks_online_methods (list): List of methods using which bricks
            will be brought online. The method to bring a brick online is
            randomly selected from the bring_bricks_online_methods list.
            By default all bricks will be brought online with
            ['glusterd_restart', 'volume_start_force'] methods.
            If 'volume_start_force' command is randomly selected then all the
            bricks would be started with the command execution. Hence we break
            from bringing bricks online individually

    Returns:
        bool : True on successfully bringing all bricks online.
            False otherwise
    """
    if bring_bricks_online_methods is None:
        bring_bricks_online_methods = [
            'glusterd_restart', 'volume_start_force'
        ]
    elif isinstance(bring_bricks_online_methods, str):
        bring_bricks_online_methods = [bring_bricks_online_methods]

    g.log.info("Bringing bricks '%s' online with '%s'", bricks_list,
               bring_bricks_online_methods)

    _rc = True
    failed_to_bring_online_list = []
    for brick in bricks_list:
        bring_brick_online_method = random.choice(bring_bricks_online_methods)
        if is_brick_mux_enabled(mnode):
            bring_bricks_online_command = ("gluster volume start %s force" %
                                           volname)
            ret, _, _ = g.run(mnode, bring_bricks_online_command)
            if ret != 0:
                g.log.error("Unable to start the volume %s with force option",
                            volname)
                _rc = False
            else:
                g.log.info(
                    "Successfully restarted volume %s to bring all "
                    "the bricks '%s' online", volname, bricks_list)

        elif bring_brick_online_method == 'glusterd_restart':
            bring_brick_online_command = "service glusterd restart"
            brick_node, _ = brick.split(":")
            ret, _, _ = g.run(brick_node, bring_brick_online_command)
            if ret != 0:
                g.log.error("Unable to restart glusterd on node %s",
                            brick_node)
                _rc = False
                failed_to_bring_online_list.append(brick)
            else:
                g.log.info(
                    "Successfully restarted glusterd on node %s to "
                    "bring back brick %s online", brick_node, brick)

        elif bring_brick_online_method == 'volume_start_force':
            bring_brick_online_command = ("gluster volume start %s force" %
                                          volname)
            ret, _, _ = g.run(mnode, bring_brick_online_command)
            if ret != 0:
                g.log.error("Unable to start the volume %s with force option",
                            volname)
                _rc = False
            else:
                g.log.info(
                    "Successfully restarted volume %s to bring all "
                    "the bricks '%s' online", volname, bricks_list)
                break
        else:
            g.log.error("Invalid method '%s' to bring brick online",
                        bring_brick_online_method)
            return False

    g.log.info("Waiting for 30 seconds for all the bricks to be online")
    time.sleep(30)
    return _rc
Beispiel #28
0
def bring_bricks_offline(volname,
                         bricks_list,
                         bring_bricks_offline_methods=None):
    """Bring the bricks specified in the bricks_list offline.

    Args:
        volname (str): Name of the volume
        bricks_list (list): List of bricks to bring them offline.

    Kwargs:
        bring_bricks_offline_methods (list): List of methods using which bricks
            will be brought offline. The method to bring a brick offline is
            randomly selected from the bring_bricks_offline_methods list.
            By default all bricks will be brought offline with
            'service_kill' method.

    Returns:
        bool : True on successfully bringing all bricks offline.
               False otherwise
    """
    if bring_bricks_offline_methods is None:
        bring_bricks_offline_methods = ['service_kill']
    elif isinstance(bring_bricks_offline_methods, str):
        bring_bricks_offline_methods = [bring_bricks_offline_methods]

    if isinstance(bricks_list, str):
        bricks_list = [bricks_list]

    node_list = []
    for brick in bricks_list:
        node, _ = brick.split(":")
        node_list.append(node)

    if is_brick_mux_enabled(node_list[0]):
        _rc = True
        failed_to_bring_offline_list = []
        for brick in bricks_list:
            brick_node, brick_path = brick.split(":")
            cmd = ("pgrep glusterfsd")
            _, out, _ = g.run(brick_node, cmd)
            if len(out.split()) > 1:
                cmd = ("ps -eaf | grep glusterfsd | "
                       " grep %s.%s | grep -o '/var/run/gluster/.*' | "
                       " awk '{ print $3 }' | grep -v 'awk' " %
                       (volname, brick_node))
            else:
                cmd = ("ps -eaf | grep glusterfsd | "
                       "grep -o '/var/run/gluster.*' | "
                       " awk '{ print $3 }' | grep -v 'awk'")
            _, socket_path, _ = g.run(brick_node, cmd)
            uds_path = socket_path.strip()
            kill_cmd = ("gf_attach -d %s %s" % (uds_path, brick_path))
            ret, _, _ = g.run(brick_node, kill_cmd)
            if ret != 0:
                g.log.error("Unable to kill the brick %s", brick)
                failed_to_bring_offline_list.append(brick)
                _rc = False

        if not _rc:
            g.log.error("Unable to bring some of the bricks %s offline",
                        failed_to_bring_offline_list)
            return False

        g.log.info("All the bricks : %s are brought offline", bricks_list)
        return True

    _rc = True
    failed_to_bring_offline_list = []
    for brick in bricks_list:
        bring_brick_offline_method = (
            random.choice(bring_bricks_offline_methods))
        if bring_brick_offline_method == 'service_kill':
            brick_node, brick_path = brick.split(":")
            brick_path = brick_path.replace("/", "-")
            kill_cmd = ("pid=`ps -ef | grep -ve 'grep' | "
                        "grep -e '%s%s.pid' | awk '{print $2}'` && "
                        "kill -15 $pid || kill -9 $pid" %
                        (brick_node, brick_path))
            ret, _, _ = g.run(brick_node, kill_cmd)
            if ret != 0:
                g.log.error("Unable to kill the brick %s", brick)
                failed_to_bring_offline_list.append(brick)
                _rc = False
        else:
            g.log.error("Invalid method '%s' to bring brick offline",
                        bring_brick_offline_method)
            return False

    if not _rc:
        g.log.error("Unable to bring some of the bricks %s offline",
                    failed_to_bring_offline_list)
        return False

    g.log.info("All the bricks : %s are brought offline", bricks_list)
    return True
 def test_expected_fail(self):
     """Testing an expected failure. This test should fail"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, _, _ = g.run(self.masternode, "false")
     self.assertEqual(rcode, 0)
    def test_limit_usage_deep_dir(self):
        # pylint: disable=too-many-statements
        """
        Verifying directory quota functionality with respect to the
        limit-usage option. Set limits on various directories [breadth]
        and check for the quota list of all the directories.

        * Enable Quota
        * Create 10 directories one inside the other and set limit of 1GB
          on each directory
        * Perform a quota list operation
        * Create some random amount of data inside each directory
        * Perform a quota list operation
        * Remove the quota limit and delete the data
        """
        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to enable quota on the volume %s",
                                  self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Create deep directories in the mount point
        for mount_object in self.mounts:
            g.log.info("Creating directories on %s:%s",
                       mount_object.client_system, mount_object.mountpoint)
            ret = mkdir(mount_object.client_system,
                        "%s/dir1/dir2/dir3/dir4/dir5/dir6/dir7/dir8/dir9/dir10"
                        % (mount_object.mountpoint), parents=True)
            self.assertTrue(ret, ("Failed to create dir under %s-%s",
                                  mount_object.client_system,
                                  mount_object.mountpoint))
            g.log.info("Successfully created deep directories on %s:%s",
                       mount_object.client_system, mount_object.mountpoint)

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )

        # Set soft timeout to 1 second
        g.log.info("Set quota soft timeout:")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '1sec')
        self.assertEqual(ret, 0, ("Failed to set soft timeout"))
        g.log.info("Quota soft timeout set successful")

        # Set hard timeout to 0 second
        g.log.info("Set quota hard timeout:")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set hard timeout"))
        g.log.info("Quota hard timeout set successful")

        # Get dir list
        g.log.info('Getting dir list in %s', self.volname)
        cmd = ("ls -R %s | grep ':' | tr -d :" % self.mounts[0].mountpoint)
        ret, out, err = g.run(self.mounts[0].client_system, cmd)
        g.log.info('mountpoint %s', self.mounts[0].mountpoint)
        self.assertFalse(ret, err)
        dir_list = out.split()
        for dir_name in dir_list:
            # Parsed to remove the mount point as quota doesn't work when
            # passed with mountpoint.
            tmp_name = dir_name.replace(self.mounts[0].mountpoint, "")
            dir_list[dir_list.index(dir_name)] = '%s' % tmp_name
        dir_list.pop(0)
        # The first entry of ls -R is the current directory which is not
        # necessary.

        # Set limit of 1 GB on every directory created inside the mountpoint
        g.log.info("Set Quota Limit on each directory of the volume %s",
                   self.volname)
        for dir_name in dir_list:
            ret, _, _ = quota_limit_usage(self.mnode, self.volname,
                                          dir_name, '1GB')
            self.assertFalse(ret, "Failed to set Quota for dir %s" %
                             dir_name)
            g.log.info("Set quota for dir %s successfully", dir_name)
        g.log.info("Successfully set the Quota limit on each path of the "
                   "volume %s", self.volname)

        # Validate quota on every Directory of the Volume
        g.log.info("Get Quota list for every directory on the volume %s",
                   self.volname)
        for dir_name in dir_list:
            ret = quota_validate(self.mnode, self.volname, path=dir_name,
                                 hard_limit=1073741824)
            self.assertTrue(ret, "Quota validate Failed for dir %s" %
                            dir_name)

        # Create some data inside each directory and do a quota validate
        self.all_mounts_procs = []
        for mount_object in self.mounts:
            g.log.info("Creating Files on %s:%s", mount_object.client_system,
                       mount_object.mountpoint)
            # Data creation
            # Creates one file of rand[0] size in each dir
            rand = random.sample([1, 10, 512], 1)
            cmd = ("/usr/bin/env python %s create_files "
                   "--fixed-file-size %sk %s/%s" % (
                       self.script_upload_path,
                       rand[0], mount_object.mountpoint, dir_list[0]))

            ret, _, _ = g.run(mount_object.client_system, cmd)
            self.assertFalse(ret, "Failed to create files")

            # quota_validate for each dir
            for dir_num, dir_name in enumerate(dir_list):
                # To calculate the dir usage for quota
                usage = (rand[0] * 1024) + \
                         ((len(dir_list) - (dir_num + 1)) * rand[0] * 1024)
                if usage >= 1073741824:
                    raise ExecutionError("usage crossed hardlimit")
                ret = quota_validate(self.mnode, self.volname, path=dir_name,
                                     hard_limit=1073741824, used_space=usage)
                self.assertTrue(ret, "Quota validate Failed for dir %s" %
                                dir_name)
                g.log.info("Quota list validate  and file created successful "
                           "for %s", dir_name)
            g.log.info("Files created and quota validated successfully")

        # Deleting data and validating quota
        self.all_mounts_procs = []
        # Deleting deep directories in the mount point
        for mount_object in self.mounts:
            ret = rmdir(mount_object.client_system, "%s/dir1/dir2" %
                        (mount_object.mountpoint), force=True)
            self.assertTrue(ret, ("Failed to delete dir under %s/dir1/dir2"
                                  % (mount_object.mountpoint)))
            g.log.info("Successfully deleted deep directories")
            # Quota validate
            # converting into bytes
            usage = (rand[0] * 1024)
            ret = quota_validate(self.mnode, self.volname,
                                 path=dir_list[0],
                                 used_space=usage)
            self.assertTrue(ret, "Quota validate Failed for dir /dir1")
            g.log.info("Quota list validate successful for /dir1")

        # Remove Quota limit
        g.log.info("Get Quota list for every directory on the volume %s",
                   self.volname)
        ret = quota_remove(self.mnode, self.volname, path=dir_list[0])
        self.assertTrue(ret, "Failed to remove Quota for dir %s" % dir_name)
        g.log.info("Quota remove  for dir %s successfully", dir_name)
    def test_subdir_with_quotaobject(self):

        # pylint: disable=too-many-statements
        """
        Mount the volume
        Create 1 subdir on mountpoint "d1"
        unmount volume
        Auth allow - Client1(d1),Client2(full volume)
        Mount the subdir "d1" on client1 and volume on client2
        Enable quota on volume
        Set quota object limit on subdir "d1" and volume
        subdir "d1" quota limit- 50
        Volume quota limit - 200
        Start writing 49 files on both subdir "d1" and volume
        Fetch quota limit object list
        Write 1 more file on subdir.This should fail
        Again reset quota object limit to 75 now on subdir "d1"
        Create 24 directories on subdir and volume.This should pass
        Fetch quota limit object list
        Create 1 more directory on subdir.This should fail
        Create 1 more directory on volume.This should pass
        """
        # Create  directory d1 on mount point
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd1' on "
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        # unmount volume
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Volumes Unmount failed")
        g.log.info("Volumes Unmounted successfully")

        # Set authentication on the subdirectoy "d1" to access by client1
        # and volume to access by client2
        g.log.info(
            'Setting authentication on subdirectory d1 to access '
            'by client %s and on volume to access by client %s',
            self.clients[0], self.clients[1])
        ret = set_auth_allow(self.volname, self.mnode, {
            '/d1': [self.clients[0]],
            '/': [self.clients[1]]
        })
        self.assertTrue(
            ret, 'Failed to set Authentication on volume %s' % self.volume)

        # Creating mount list for mounting subdir mount and volume
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/d1" % self.volname

        # Mount Subdirectory d1 on client 1 and volume on client 2
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount  %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted sub directory and volume to "
                   "authenticated clients")

        # Enable quota on volume
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to enable quota on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Check if quota is enabled
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Quota is not enabled on the volume %s", self.volname))
        g.log.info("Successfully Validated quota is enabled on volume %s",
                   self.volname)

        # Set quota-soft-timeout to 0
        g.log.info("Setting up soft timeout to 0")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, "0")
        self.assertEqual(ret, 0, ("Failed to set quota-soft-timeout"))
        g.log.info("Successfully set the quota-soft-timeout")

        # Set quota-hard-timeout to 0
        g.log.info("Setting up hard timeout with 0")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, "0")
        self.assertEqual(ret, 0, ("Failed to set quota-hard-timeout"))
        g.log.info("successfully set the quota-hard-timeout")

        # Set Quota object limit on the subdir "d1" and on volume
        for mount_obj in self.subdir_mounts:
            if mount_obj.volname == "%s/d1" % self.volname:
                path1 = "/d1"
                limit = "50"
            else:
                path1 = "/"
                limit = "200"
            g.log.info("Set Quota Limit on the path %s of the volume %s",
                       path1, self.volname)
            ret, _, _ = quota_limit_objects(self.mnode, self.volname, path1,
                                            limit)
            self.assertEqual(ret, 0,
                             ("Failed to set quota limit on path "
                              "%s of the volume %s", path1, self.volname))
            g.log.info(
                "Successfully set the quota limit on %s of the volume "
                "%s", path1, self.volname)

        # Create near to 49 files on both subdir mount and volume mount
        for mount_object in self.subdir_mounts:
            g.log.info("Creating Files on %s:%s", mount_object.client_system,
                       mount_object.mountpoint)
            cmd = ("cd %s ; for i in `seq 1 49` ;"
                   "do touch $i;done " % (mount_object.mountpoint))
            ret, _, _ = g.run(mount_object.client_system, cmd)
            self.assertEqual(ret, 0, "Failed to create files on mountpoint")
            g.log.info("Files created successfully on mountpoint")

        # Fetch Quota List object on the volume
        g.log.info("Get Quota list on the volume %s", self.volname)
        quota_list = quota_fetch_list_objects(self.mnode, self.volname)

        self.assertIsNotNone(quota_list, ("Failed to get the quota list "
                                          "of the volume %s", self.volname))

        # Create 1 file on subdir to check if quota limit is
        # adhere by subdir d1
        g.log.info("Creating File on %s:%s", self.clients[0],
                   self.subdir_mounts[0].mountpoint)
        cmd = ("cd %s ; touch test " % (self.subdir_mounts[0].mountpoint))
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertNotEqual(ret, 0, ("File creation was expected to Fail."
                                     "But it got passed"))
        g.log.info(
            "File creation failed as expected on %s:%s as quota"
            " limit reached already", self.clients[0],
            self.subdir_mounts[0].mountpoint)

        # Modify quota object limit for subdir from 50 to 75
        path1 = "/d1"
        g.log.info("Set Quota Limit on the path %s of the volume %s", path1,
                   self.volname)
        ret, _, _ = quota_limit_objects(self.mnode, self.volname, path1, "75")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path1, self.volname))
        g.log.info(
            "Successfully set the quota limit on %s of the volume "
            "%s", path1, self.volname)

        # Create near to 25 directories on both subdir mount "d1" and volume
        for mount_object in self.subdir_mounts:
            g.log.info("Creating directories on %s:%s",
                       mount_object.client_system, mount_object.mountpoint)
            for i in range(0, 25):
                ret = mkdir(mount_object.client_system,
                            "%s/dir%s" % (mount_object.mountpoint, i),
                            parents=True)
                self.assertTrue(ret, "Failed to create directories"
                                "on mountpoint")
                g.log.info("Directories created successfully on mountpoint")

        # Get Quota List on the volume
        g.log.info("Get Quota list on the volume %s", self.volname)
        quota_list = quota_fetch_list_objects(self.mnode, self.volname)
        self.assertIsNotNone(quota_list, ("Failed to get the quota list "
                                          "of the volume %s", self.volname))

        # Create 1 directory on subdir "d1" and volume to check if quota
        # limit is adhere by subdir d1 and volume
        for mount_object in self.subdir_mounts:
            g.log.info("Creating directory on %s:%s",
                       mount_object.client_system, mount_object.mountpoint)
            ret = mkdir(mount_object.client_system,
                        "%s/dirTest" % mount_object.mountpoint,
                        parents=True)
            if mount_object.volname == "%s/d1" % self.volname:
                self.assertFalse(
                    ret, "Directory creation was expected"
                    "to Fail.But it got passed")
                g.log.info("Direction creation failed as expected on"
                           "subdir d1")
            else:
                self.assertTrue(ret, "Directory creation got failed"
                                "on volume")
                g.log.info("Direction creation successful  on volume")
    def setUpClass(cls):
        """Setup nfs-ganesha cluster
        tests.
        """

        # Check if gdeploy is installed on glusto-tests management node.
        ret, _, _ = g.run_local("gdeploy --version")
        if ret != 0:
            raise ConfigError("Please install gdeploy to run the scripts")

        GlusterBaseClass.setUpClass.im_func(cls)

        # Check if enable_nfs_ganesha is set in config file
        if not cls.enable_nfs_ganesha:
            raise ConfigError("Please enable nfs ganesha in config")

        # Read num_of_nfs_ganesha_nodes from config file and create
        # nfs ganesha cluster accordingly
        cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes)
        cls.servers_in_nfs_ganesha_cluster = (
            cls.servers[:cls.num_of_nfs_ganesha_nodes])
        cls.vips_in_nfs_ganesha_cluster = (
            cls.vips[:cls.num_of_nfs_ganesha_nodes])

        # Create nfs ganesha cluster if not exists already
        if (is_nfs_ganesha_cluster_exists(
         cls.servers_in_nfs_ganesha_cluster[0])):
            if is_nfs_ganesha_cluster_in_healthy_state(
             cls.servers_in_nfs_ganesha_cluster[0]):
                g.log.info("Nfs-ganesha Cluster exists and is in healthy "
                           "state. Skipping cluster creation...")
            else:
                g.log.info("Nfs-ganesha Cluster exists and is not in "
                           "healthy state.")
                g.log.info("Tearing down existing cluster which is not in "
                           "healthy state")
                ganesha_ha_file = ("/var/run/gluster/shared_storage/"
                                   "nfs-ganesha/ganesha-ha.conf")

                g.log.info("Collecting server details of existing "
                           "nfs ganesha cluster")
                conn = g.rpyc_get_connection(
                    cls.servers_in_nfs_ganesha_cluster[0], user="******")
                if conn is None:
                    tmp_node = cls.servers_in_nfs_ganesha_cluster[0]
                    raise ExecutionError("Unable to get connection to 'root' "
                                         " of node %s "
                                         % tmp_node)
                if not conn.modules.os.path.exists(ganesha_ha_file):
                    raise ExecutionError("Unable to locate %s"
                                         % ganesha_ha_file)
                with conn.builtin.open(ganesha_ha_file, "r") as fh:
                    ganesha_ha_contents = fh.read()
                g.rpyc_close_connection(
                    host=cls.servers_in_nfs_ganesha_cluster[0], user="******")
                servers_in_existing_cluster = re.findall(r'VIP_(.*)\=.*',
                                                         ganesha_ha_contents)

                ret = teardown_nfs_ganesha_cluster(
                    servers_in_existing_cluster, force=True)
                if not ret:
                    raise ExecutionError("Failed to teardown nfs "
                                         "ganesha cluster")
                g.log.info("Existing cluster got teardown successfully")
                g.log.info("Creating nfs-ganesha cluster of %s nodes"
                           % str(cls.num_of_nfs_ganesha_nodes))
                g.log.info("Nfs-ganesha cluster node info: %s"
                           % cls.servers_in_nfs_ganesha_cluster)
                g.log.info("Nfs-ganesha cluster vip info: %s"
                           % cls.vips_in_nfs_ganesha_cluster)
                ret = create_nfs_ganesha_cluster(
                    cls.servers_in_nfs_ganesha_cluster,
                    cls.vips_in_nfs_ganesha_cluster)
                if not ret:
                    raise ExecutionError("Failed to create "
                                         "nfs-ganesha cluster")
        else:
            g.log.info("Creating nfs-ganesha cluster of %s nodes"
                       % str(cls.num_of_nfs_ganesha_nodes))
            g.log.info("Nfs-ganesha cluster node info: %s"
                       % cls.servers_in_nfs_ganesha_cluster)
            g.log.info("Nfs-ganesha cluster vip info: %s"
                       % cls.vips_in_nfs_ganesha_cluster)
            ret = create_nfs_ganesha_cluster(
                cls.servers_in_nfs_ganesha_cluster,
                cls.vips_in_nfs_ganesha_cluster)
            if not ret:
                raise ExecutionError("Failed to create "
                                     "nfs-ganesha cluster")

        if is_nfs_ganesha_cluster_in_healthy_state(
         cls.servers_in_nfs_ganesha_cluster[0]):
            g.log.info("Nfs-ganesha Cluster exists is in healthy state")
        else:
            raise ExecutionError("Nfs-ganesha Cluster setup Failed")

        ret = set_nfs_ganesha_client_configuration(cls.clients)
        if not ret:
            raise ExecutionError("Failed to do client nfs ganesha "
                                 "configuration")

        for server in cls.servers:
            for client in cls.clients:
                cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then "
                       "echo \"%s %s\" >> /etc/hosts; fi"
                       % (client, socket.gethostbyname(client), client))
                ret, _, _ = g.run(server, cmd)
                if ret != 0:
                    g.log.error("Failed to add entry of client %s in "
                                "/etc/hosts of server %s"
                                % (client, server))

        for client in cls.clients:
            for server in cls.servers:
                cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then "
                       "echo \"%s %s\" >> /etc/hosts; fi"
                       % (server, socket.gethostbyname(server), server))
                ret, _, _ = g.run(client, cmd)
                if ret != 0:
                    g.log.error("Failed to add entry of server %s in "
                                "/etc/hosts of client %s"
                                % (server, client))
    def test_gfid_split_brain_resolution(self):
        """
        - create gfid split-brain of files and resolves them using source-brick
          option of the CLI.
        """

        # pylint: disable=too-many-statements
        # pylint: disable=too-many-locals

        # Disable all self-heals and client-quorum
        options = {"self-heal-daemon": "off",
                   "data-self-heal": "off",
                   "metadata-self-heal": "off",
                   "entry-self-heal": "off",
                   "cluster.quorum-type": "none"}
        g.log.info("setting volume options %s", options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Create dir inside which I/O will be performed.
        ret = mkdir(self.mounts[0].client_system, "%s/test_gfid_split_brain"
                    % self.mounts[0].mountpoint)
        self.assertTrue(ret, "mkdir failed")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Toggle bricks and perform I/O
        file_list = ["file1.txt", "file2.txt", "file3.txt", "file4.txt",
                     "file5.txt", "file6.txt", "file7.txt", "file8.txt",
                     "file9.txt", "file10.txt"]
        brick_index = 0
        offline_bricks = []
        for _ in range(0, 3):
            for i in range(0, num_subvols):
                subvol_brick_list = subvols_dict['volume_subvols'][i]
                offline_bricks.append(subvol_brick_list[brick_index % 3])
                offline_bricks.append(subvol_brick_list[(brick_index+1) % 3])
            self.toggle_bricks_and_perform_io(file_list, offline_bricks)
            brick_index += 1
            offline_bricks[:] = []

        # Enable shd
        g.log.info("enabling the self heal daemon")
        ret = enable_self_heal_daemon(self.mnode, self.volname)
        self.assertTrue(ret, "failed to enable self heal daemon")
        g.log.info("Successfully enabled the self heal daemon")

        # Wait for self heal processes to come online
        g.log.info("Wait for selfheal process to come online")
        timeout = 300
        ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname,
                                                      timeout)
        self.assertTrue(ret, "Self-heal process are not online")
        g.log.info("All self heal process are online")

        # Trigger heal
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # checking if file is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "Files are not in split-brain as expected.")
        g.log.info("Files are still in split-brain")

        # First brick of each replica will be used as source-brick
        first_brick_list = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            brick = subvol_brick_list[0]
            first_brick_list.append(brick)

        # Find which dht subvols the 10 files are present in and trigger heal
        for filename in file_list:
            fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \
                    filename
            gfile = GlusterFile(self.clients[0], fpath)
            for brick in first_brick_list:
                _, brick_path = brick.split(':')
                match = [brick for item in gfile.hashed_bricks if brick_path
                         in item]
                if match:
                    self.resolve_gfid_split_brain("/test_gfid_split_brain/" +
                                                  filename, brick)

        # Trigger heal to complete pending data/metadata heals
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Get arequals and compare
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan'
                       % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

            # Get arequal for every brick and compare with first brick
            for brick in subvol_brick_list[1:]:
                node, brick_path = brick.split(':')
                command = ('arequal-checksum -p %s '
                           '-i .glusterfs -i .landfill -i .trashcan'
                           % brick_path)
                ret, brick_arequal, _ = g.run(node, command)
                self.assertFalse(ret,
                                 'Failed to get arequal on brick %s'
                                 % brick)
                g.log.info('Getting arequal for %s is successful', brick)
                brick_total = brick_arequal.splitlines()[-1].split(':')[-1]

                self.assertEqual(first_brick_total, brick_total,
                                 'Arequals for subvol and %s are not equal'
                                 % brick)
                g.log.info('Arequals for subvol and %s are equal', brick)
    def test_heal_when_dir_quota_exceeded(self):
        # Create a directory to set the quota_limit_usage
        path = "/dir"
        g.log.info("Creating a directory")
        self.all_mounts_procs = []
        for mount_object in self.mounts:
            cmd = "/usr/bin/env python %s create_deep_dir -d 0 -l 0 %s%s" % (
                self.script_upload_path, mount_object.mountpoint, path)
            ret = g.run(mount_object.client_system, cmd)
            self.assertTrue(ret, "Failed to create directory on mountpoint")
            g.log.info("Directory created successfully on mountpoint")

        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to enable quota on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Set quota-soft-timeout to 0
        g.log.info("Setting up soft timeout to 0")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, "0")
        self.assertEqual(ret, 0, ("Failed to set quota-soft-timeout"))
        g.log.info("Successfully set the quota-soft-timeout")

        # Set quota-hard-timeout to 0
        g.log.info("Setting up hard timeout with 0")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, "0")
        self.assertEqual(ret, 0, ("Failed to set quota-hard-timeout"))
        g.log.info("successfully set the quota-hard-timeout")

        # Set Quota limit on the newly created directory
        g.log.info("Set Quota Limit on the path %s of the volume %s", path,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path=path,
                                      limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path, self.volname))
        g.log.info(
            "Successfully set the Quota limit on %s of the volume "
            "%s", path, self.volname)

        # Create 2 files of size 400MB inside the directory
        for mount_object in self.mounts:
            g.log.info("Creating Files on %s:%s", mount_object.client_system,
                       path)
            cmd = ("cd %s%s && for i in `seq 1 2` ;"
                   "do dd if=/dev/urandom of=file$i bs=20M "
                   "count=20; done" % (mount_object.mountpoint, path))
            ret, _, _ = g.run(mount_object.client_system, cmd)
            self.assertEqual(ret, 0, ("Failed to create files on %s", path))
            g.log.info("Files created successfully on mountpoint")

        bricks_list = get_all_bricks(self.mnode, self.volname)

        # Bring brick2 offline
        g.log.info('Bringing bricks %s offline', bricks_list[2])
        ret = bring_bricks_offline(self.volname, bricks_list[2])
        self.assertTrue(ret,
                        'Failed to bring bricks %s offline' % bricks_list[2])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[2]])
        self.assertTrue(ret, 'Brick %s is not offline' % bricks_list[2])
        g.log.info('Bringing brick %s offline is successful', bricks_list[2])

        # Create a file of size 500MB inside the directory and it should fail
        # as the quota limit exceeds
        cmd = ("cd %s%s && dd if=/dev/urandom of=file3 bs=20M count=25" %
               (mount_object.mountpoint, path))
        ret, _, _ = g.run(mount_object.client_system, cmd)
        self.assertEqual(ret, 1, ("Writing a file of 500MB succeeded while "
                                  "it was not supposed to."))
        g.log.info("Writing a file of size 500MB failed as expected "
                   "due to quota limit on the directory.")

        # Bring brick2 online and check status
        g.log.info('Bringing brick %s online...', bricks_list[2])
        ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[2]])
        self.assertTrue(ret,
                        'Failed to bring brick %s online' % bricks_list[2])
        g.log.info('Bringing brick %s online is successful', bricks_list[2])

        g.log.info("Verifying if brick %s is online", bricks_list[2])
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, ("Brick %s did not come up", bricks_list[2]))
        g.log.info("Brick %s has come online.", bricks_list[2])

        # Trigger heal
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')
    def test_afr_gfid_heal(self):

        """
        Description: This test case runs split-brain resolution
                     on a 5 files in split-brain on a 1x2 volume.
                     After resolving split-brain, it makes sure that
                     split brain resolution doesn't work on files
                     already in split brain.
        """

        g.log.info("disabling the self heal daemon")
        ret = disable_self_heal_daemon(self.mnode, self.volname)
        self.assertTrue(ret, "unable to disable self heal daemon")
        g.log.info("Successfully disabled the self heal daemon")

        # getting list of all bricks
        all_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(all_bricks, "failed to get list of bricks")
        g.log.info("bringing down brick1")
        ret = bring_bricks_offline(self.volname, all_bricks[0:1])
        self.assertTrue(ret, "unable to bring brick1 offline")
        g.log.info("Successfully brought the following brick offline "
                   ": %s", str(all_bricks[0]))
        g.log.info("verifying if brick1 is offline")
        ret = are_bricks_offline(self.mnode, self.volname, all_bricks[0:1])
        self.assertTrue(ret, "brick1 is still online")
        g.log.info("verified: brick1 is offline")

        g.log.info("creating 5 files from mount point")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_files -f 5 "
                   "--base-file-name test_file --fixed-file-size 1k %s" % (
                       self.script_upload_path,
                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
        # Validate I/O
        g.log.info("Wait for IO to complete and validate IO.....")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")
        g.log.info("Successfully created a file from mount point")

        g.log.info("bringing brick 1 back online")
        ret = bring_bricks_online(self.mnode, self.volname, all_bricks[0:1])
        self.assertIsNotNone(ret, "unable to bring brick 1 online")
        g.log.info("Successfully brought the following brick online "
                   ": %s", str(all_bricks[0]))
        g.log.info("verifying if brick1 is online")
        ret = are_bricks_online(self.mnode, self.volname, all_bricks[0:1])
        self.assertTrue(ret, "brick1 is not online")
        g.log.info("verified: brick1 is online")

        g.log.info("bringing down brick2")
        ret = bring_bricks_offline(self.volname, all_bricks[1:2])
        self.assertTrue(ret, "unable to bring brick2 offline")
        g.log.info("Successfully brought the following brick offline "
                   ": %s", str(all_bricks[1]))
        g.log.info("verifying if brick2 is offline")
        ret = are_bricks_offline(self.mnode, self.volname, all_bricks[1:2])
        self.assertTrue(ret, "brick2 is still online")
        g.log.info("verified: brick2 is offline")

        g.log.info("creating 5 new files of same name from mount point")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_files -f 5 "
                   "--base-file-name test_file --fixed-file-size 10k %s" % (
                       self.script_upload_path,
                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
        # Validate I/O
        g.log.info("Wait for IO to complete and validate IO.....")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")
        g.log.info("Successfully created a new file of same name "
                   "from mount point")

        g.log.info("bringing brick2 back online")
        ret = bring_bricks_online(self.mnode, self.volname, all_bricks[1:2])
        self.assertIsNotNone(ret, "unable to bring brick2 online")
        g.log.info("Successfully brought the following brick online "
                   ": %s", str(all_bricks[1]))
        g.log.info("verifying if brick2 is online")
        ret = are_bricks_online(self.mnode, self.volname, all_bricks[1:2])
        self.assertTrue(ret, "brick2 is not online")
        g.log.info("verified: brick2 is online")

        g.log.info("enabling the self heal daemon")
        ret = enable_self_heal_daemon(self.mnode, self.volname)
        self.assertTrue(ret, "failed to enable self heal daemon")
        g.log.info("Successfully enabled the self heal daemon")

        g.log.info("checking if volume is in split-brain")
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "unable to create split-brain scenario")
        g.log.info("Successfully created split brain scenario")

        g.log.info("resolving split-brain by choosing first brick as "
                   "the source brick")
        node, brick_path = all_bricks[0].split(':')
        for fcount in range(5):
            command = ("gluster v heal " + self.volname + " split-brain "
                       "source-brick " + all_bricks[0] + ' /test_file' +
                       str(fcount) + '.txt')
            ret, _, _ = g.run(node, command)
            self.assertEqual(ret, 0, "command execution not successful")
        # triggering heal
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, "heal not triggered")
        g.log.info("Successfully triggered heal")
        # waiting for heal to complete
        ret = monitor_heal_completion(self.mnode, self.volname,
                                      timeout_period=240)
        self.assertTrue(ret, "heal not completed")
        g.log.info("Heal completed successfully")
        # checking if any file is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, "file still in split-brain")
        g.log.info("Successfully resolved split brain situation using "
                   "CLI based resolution")

        g.log.info("resolving split-brain on a file not in split-brain")
        node, brick_path = all_bricks[0].split(':')
        command = ("gluster v heal " + self.volname + " split-brain "
                   "source-brick " + all_bricks[1] + " /test_file0.txt")
        ret, _, _ = g.run(node, command)
        self.assertNotEqual(ret, 0, "Unexpected: split-brain resolution "
                                    "command is successful on a file which"
                                    " is not in split-brain")
        g.log.info("Expected: split-brian resolution command failed on "
                   "a file which is not in split-brain")

        g.log.info("checking the split-brain status of each file")
        for fcount in range(5):
            fpath = (self.mounts[0].mountpoint + '/test_file' +
                     str(fcount) + '.txt')
            status = get_fattr(self.mounts[0].client_system,
                               fpath, 'replica.split-brain-status')
            compare_string = ("The file is not under data or metadata "
                              "split-brain")
            self.assertEqual(status.rstrip('\x00'), compare_string,
                             "file test_file%s is under"
                             " split-brain" % str(fcount))
        g.log.info("none of the files are under split-brain")
def is_nfs_ganesha_cluster_in_failover_state(mnode, failed_nodes):
    """
       Checks whether nfs ganesha cluster is in failover state.

    Args:
        mnode (str): Node in which cmd command will
            be executed.
        failed_nodes (list): Nodes in which nfs-ganesha process
            are down.

    Returns:
        bool : True if nfs ganesha cluster is in failover state.
            False otherwise

    Example:
        is_nfs_ganesha_cluster_in_failover_state(mnode, failed_nodes)
    """

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep " +
           " 'Cluster HA Status' | cut -d ' ' -f 4 ")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to check "
                    "if cluster is in failover state")
        return False

    if stdout.strip('\n') != "FAILOVER":
        g.log.error("nfs-ganesha cluster is not in failover state. Current "
                    "cluster state: %s " % stdout)
        return False

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep -v" +
           " 'Online' | grep -v 'Cluster' | cut -d ' ' -f 1 | " +
           "sed s/'-cluster_ip-1'//g")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to parse "
                    "for the cluster resources")
        return False

    cluster_list = stdout.split("\n")
    cluster_list = list(filter(None, cluster_list))

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep -v" +
           " 'Online' | grep -v 'Cluster' | cut -d ' ' -f 2 | " +
           "sed s/'-cluster_ip-1'//g")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to parse "
                    "for the hostnames in cluster")
        return False

    host_list = stdout.split("\n")
    host_list = list(filter(None, host_list))

    ret = True
    for cluster_node, host_node in zip(cluster_list, host_list):
        if cluster_node in failed_nodes:
            if cluster_node == host_node:
                g.log.error("failover status: failed node %s isn't taken over"
                            " by other node in nfs-ganesha cluster" %
                            cluster_node)
                ret = False
            else:
                g.log.info("failover status: failed node %s is successfully "
                           "failovered to node %s" %
                           (cluster_node, host_node))
        else:
            if cluster_node != host_node:
                g.log.error("Unexpected. Other nodes are in failover state. "
                            "Node %s is takenover by node %s in nfs-ganesha "
                            "cluster" % (cluster_node, host_node))
                ret = False
    return ret
Beispiel #37
0
def rebalance_stop_and_get_status(mnode, volname):
    """Parse the output of 'gluster vol rebalance stop' command
       for the given volume

    Args:
        mnode (str): Node on which command has to be executed.
        volname (str): volume name

    Returns:
        NoneType: None if command execution fails, parse errors.
        dict: dict on success. rebalance status will be
            in dict format

    Examples:
        >>> rebalance_stop_and_get_status('abc.xyz.com', testvol)
        {'node': [{'files': '0', 'status': '3', 'lookups': '0', 'skipped': '0',
        'nodeName': 'localhost', 'failures': '0', 'runtime': '0.00', 'id':
        '11336017-9561-4e88-9ac3-a94d4b403340', 'statusStr': 'completed',
        'size': '0'}, {'files': '0', 'status': '1', 'lookups': '0', 'skipped':
        '0', 'nodeName': '10.70.47.16', 'failures': '0', 'runtime': '0.00',
        'id': 'a2b88b10-eba2-4f97-add2-8dc37df08b27', 'statusStr':
        'in progress', 'size': '0'}, {'files': '0', 'status': '3',
        'lookups': '0', 'skipped': '0', 'nodeName': '10.70.47.152',
        'failures': '0', 'runtime': '0.00', 'id':
        'b15b8337-9f8e-4ec3-8bdb-200d6a67ae12', 'statusStr': 'completed',
        'size': '0'}, {'files': '0', 'status': '3', 'lookups': '0', 'skipped':
        '0', 'nodeName': '10.70.46.52', 'failures': '0', 'runtime': '0.00',
        'id': '77dc299a-32f7-43d8-9977-7345a344c398', 'statusStr': 'completed',
        'size': '0'}], 'task-id': 'a16f99d1-e165-40e7-9960-30508506529b',
        'aggregate': {'files': '0', 'status': '1', 'lookups': '0', 'skipped':
        '0', 'failures': '0', 'runtime': '0.00', 'statusStr': 'in progress',
        'size': '0'}, 'nodeCount': '4', 'op': '3'}
    """

    cmd = "gluster volume rebalance %s stop --xml" % volname
    ret, out, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error(
            "Failed to execute 'rebalance stop' on node %s. "
            "Hence failed to parse the rebalance status.", mnode)
        return None

    try:
        root = etree.XML(out)
    except etree.ParseError:
        g.log.error("Failed to parse gluster rebalance stop xml output.")
        return None

    rebal_status = {}
    rebal_status["node"] = []
    for info in root.findall("volRebalance"):
        for element in info.getchildren():
            if element.tag == "node":
                status_info = {}
                for elmt in element.getchildren():
                    status_info[elmt.tag] = elmt.text
                rebal_status[element.tag].append(status_info)
            elif element.tag == "aggregate":
                status_info = {}
                for elmt in element.getchildren():
                    status_info[elmt.tag] = elmt.text
                rebal_status[element.tag] = status_info
            else:
                rebal_status[element.tag] = element.text
    return rebal_status
def create_nfs_ganesha_cluster(servers, vips):
    """
    Creating a ganesha HA cluster

    Args:
        servers(list): Hostname of ganesha nodes
        vips(list): VIPs that has to be assigned for each nodes
    Returns:
        True(bool): If configuration of ganesha cluster is success
        False(bool): If failed to configure ganesha cluster
    """
    # pylint: disable=too-many-return-statements
    ganesha_mnode = servers[0]

    # Configure ports in ganesha servers
    g.log.info("Defining statd service ports")
    ret = configure_ports_on_servers(servers)
    if not ret:
        g.log.error("Failed to set statd service ports on nodes.")
        return False

    # Firewall settings for nfs-ganesha
    ret = ganesha_server_firewall_settings(servers)
    if not ret:
        g.log.error("Firewall settings for nfs ganesha has failed.")
        return False
    g.log.info("Firewall settings for nfs ganesha was success.")

    # Do peer probe if not already done
    ret = peer_probe_servers(ganesha_mnode, servers, validate=True)
    if not ret:
        g.log.error("Peer probe failed")
        return False

    # Enable shared storage if not present
    ret, _, _ = g.run(ganesha_mnode,
                      "gluster v list | grep 'gluster_shared_storage'")
    if ret != 0:
        if not enable_shared_storage(ganesha_mnode):
            g.log.error("Failed to enable shared storage")
            return False
        g.log.info("Enabled gluster shared storage.")
    else:
        g.log.info("Shared storage is already enabled.")

    # Enable the glusterfssharedstorage.service and nfs-ganesha service
    for server in servers:
        cmd = "systemctl enable glusterfssharedstorage.service"
        ret, _, _ = g.run(server, cmd)
        if ret != 0:
            g.log.error("Failed to enable glusterfssharedstorage.service "
                        "on %s", server)
            return False

        ret, _, _ = g.run(server, "systemctl enable nfs-ganesha")
        if ret != 0:
            g.log.error("Failed to enable nfs-ganesha service on %s", server)
            return False

    # Password less ssh for nfs
    ret = create_nfs_passwordless_ssh(ganesha_mnode, servers)
    if not ret:
        g.log.error("Password less ssh between nodes failed.")
        return False
    g.log.info("Password less ssh between nodes successful.")

    # Create ganesha-ha.conf file
    tmp_ha_conf = "/tmp/ganesha-ha.conf"
    create_ganesha_ha_conf(servers, vips, tmp_ha_conf)

    # Check whether ganesha-ha.conf file is created
    if not os.path.isfile(tmp_ha_conf):
        g.log.error("Failed to create ganesha-ha.conf")
        return False

    # Cluster auth setup
    ret = cluster_auth_setup(servers)
    if not ret:
        g.log.error("Failed to configure cluster services")
        return False

    # Create nfs-ganesha directory in shared storage
    dpath = '/var/run/gluster/shared_storage/nfs-ganesha'
    mkdir(ganesha_mnode, dpath)

    # Copy the config files to shared storage
    cmd = 'cp -p /etc/ganesha/ganesha.conf %s/' % dpath
    ret, _, _ = g.run(ganesha_mnode, cmd)
    if ret != 0:
        g.log.error("Failed to copy ganesha.conf to %s/", dpath)
        return False

    g.upload(ganesha_mnode, tmp_ha_conf, '%s/' % dpath)

    # Create backup of ganesha-ha.conf file in ganesha_mnode
    g.upload(ganesha_mnode, tmp_ha_conf, '/etc/ganesha/')

    # Enabling ganesha
    g.log.info("Enable nfs-ganesha")
    ret, _, _ = enable_nfs_ganesha(ganesha_mnode)

    if ret != 0:
        g.log.error("Failed to enable ganesha")
        return False

    g.log.info("Successfully created ganesha cluster")

    # pcs status output
    _, _, _ = g.run(ganesha_mnode, "pcs status")

    return True
Beispiel #39
0
    def test_entry_heal_with_quota(self):
        """
        - Create a 1x3 volume
        - Set quota object limit
        - Create files less than the limit
        - Bring down a brick and create more files until limit is hit
        - Delete one file so that we are below the limit, and create one more
          file
        - Bring the brick back up and launch heal
        - Verify that after heal is complete, the deleted file does not
          re-appear in any of the bricks.
        """
        # pylint: disable=too-many-statements
        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Check if quota is enabled
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Quota is not enabled on the volume %s", self.volname))
        g.log.info("Successfully Validated quota is enabled on volume %s",
                   self.volname)

        # Set quota related options
        options = {
            "quota-deem-statfs": "on",
            "soft-timeout": "0",
            "hard-timeout": "0"
        }
        g.log.info("setting quota volume options %s", options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Create directory on mount
        ret = mkdir(self.mounts[0].client_system,
                    "%s/dir" % self.mounts[0].mountpoint)
        self.assertTrue(ret, "mkdir failed")

        # Set Quota limit on the directory
        path = "/dir"
        g.log.info(
            "Setting Quota Limit object on the path %s of the "
            "volume %s", path, self.volname)
        ret, _, _ = quota_limit_objects(self.mnode,
                                        self.volname,
                                        path=path,
                                        limit="10")
        self.assertEqual(ret, 0,
                         ("Failed to set quota limit object "
                          "on path %s of the volume %s", path, self.volname))
        g.log.info(
            "Successfully set the Quota limit object on %s of the "
            "volume %s", path, self.volname)

        cmd = ("touch %s/dir/file{1..5}" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "file creation failed")

        # Bring brick3 offline
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info('Bringing brick %s offline', bricks_list[2])
        ret = bring_bricks_offline(self.volname, bricks_list[2])
        self.assertTrue(ret,
                        'Failed to bring brick %s offline' % bricks_list[2])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[2]])
        self.assertTrue(ret, 'Brick %s is not offline' % bricks_list[2])
        g.log.info('Bringing brick %s offline was successful', bricks_list[2])

        # Create files until quota object limit
        cmd = ("touch %s/dir/file{6..9}" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "file creation failed")

        # The next create must fail
        cmd = ("touch %s/dir/file10" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(
            ret, 1, ("Creation of %s/dir/file10 succeeded while "
                     "it was not supposed to." % self.mounts[0].mountpoint))
        g.log.info(
            "Creation of %s/dir/file10 failed as expected due to "
            "quota object limit.", self.mounts[0].mountpoint)

        # Delete one file and re-try the create to succeed.
        cmd = ("rm %s/dir/file1" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "File deletion failed")
        cmd = ("touch %s/dir/file10" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "File creation failed")

        # Bring brick3 online and check status
        g.log.info('Bringing brick %s online...', bricks_list[2])
        ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[2]])
        self.assertTrue(ret,
                        'Failed to bring brick %s online' % bricks_list[2])
        g.log.info('Bringing brick %s online is successful', bricks_list[2])

        g.log.info("Verifying if brick3 is online....")
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, ("brick3 did not come up"))
        g.log.info("brick3 has come online.")

        # Trigger heal
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Verify that file10 did not get recreated on the down brick by an
        # accidental conservative merge.
        for brick in bricks_list:
            node, brick_path = brick.split(':')
            ret, _, _ = g.run(node, 'stat %s/dir/file10' % brick_path)
            self.assertFalse(ret, 'File present!')
def create_nfs_passwordless_ssh(mnode, gnodes, guser='******'):
    """
    Enable key-based SSH authentication without password on all the HA nodes

    Args:
        mnode(str): Hostname of ganesha maintenance node.
        gnodes(list): Hostname of all ganesha nodes including maintenance node
        guser(str): User for setting password less ssh
    Returns:
        True(bool): On success
        False(bool): On failure
    """
    loc = "/var/lib/glusterd/nfs/"
    mconn_inst = random.randint(20, 100)
    mconn = g.rpyc_get_connection(host=mnode, instance=mconn_inst)

    if not mconn.modules.os.path.isfile('/root/.ssh/id_rsa'):
        # Generate key on mnode if not already present
        if not mconn.modules.os.path.isfile('%s/secret.pem' % loc):
            ret, _, _ = g.run(
                mnode, "ssh-keygen -f %s/secret.pem -q -N ''" % loc)
            if ret != 0:
                g.log.error("Failed to generate the secret pem file")
                return False
            g.log.info("Key generated on %s" % mnode)
    else:
        mconn.modules.shutil.copyfile("/root/.ssh/id_rsa",
                                      "%s/secret.pem" % loc)
        g.log.info("Copying the id_rsa.pub to secret.pem.pub")
        mconn.modules.shutil.copyfile("/root/.ssh/id_rsa.pub",
                                      "%s/secret.pem.pub" % loc)

    # Create password less ssh from mnode to all ganesha nodes
    for gnode in gnodes:
        gconn_inst = random.randint(20, 100)
        gconn = g.rpyc_get_connection(gnode, user=guser, instance=gconn_inst)
        try:
            glocal = gconn.modules.os.path.expanduser('~')
            gfhand = gconn.builtin.open("%s/.ssh/authorized_keys" % glocal,
                                        "a")
            with mconn.builtin.open("/root/.ssh/id_rsa.pub", 'r') as fhand:
                for line in fhand:
                    gfhand.write(line)
            gfhand.close()
        except Exception as exep:
            g.log.error("Exception occurred while trying to establish "
                        "password less ssh from %s@%s to %s@%s. Exception: %s"
                        % ('root', mnode, guser, gnode, exep))
            return False
        finally:
            g.rpyc_close_connection(
                host=gnode, user=guser, instance=gconn_inst)

    g.rpyc_close_connection(host=mnode, instance=mconn_inst)

    # Copy the ssh key pair from mnode to all the nodes in the Ganesha-HA
    # cluster
    g.log.info("Copy the ssh key pair from %s to other nodes in the "
               "Ganesha-HA cluster" % mnode)
    for gnode in gnodes:
        # Add ganesha nodes to known_hosts
        g.run(mnode, "ssh-keyscan -H %s  >> ~/.ssh/known_hosts" % gnode)
        if gnode != mnode:
            cmd = ("scp -i %s/secret.pem %s/secret.* %s@%s:%s/"
                   % (loc, loc, guser, gnode, loc))
            ret, _, _ = g.run(mnode, cmd)
            if ret != 0:
                g.log.error("Failed to copy the ssh key pair from %s to %s",
                            mnode, gnode)
                return False
    return True
    def test_quorum_messages_in_syslog_with_more_volumes(self):
        """
        create two volumes
        Set server quorum to both the volumes
        set server quorum ratio 90%
        stop glusterd service any one of the node
        quorum regain message should be recorded with message id - 106002
        for both the volumes in /var/log/messages and
        /var/log/glusterfs/glusterd.log
        start the glusterd service of same node
        quorum regain message should be recorded with message id - 106003
        for both the volumes in /var/log/messages and
        /var/log/glusterfs/glusterd.log
        """
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-statements

        self.log_messages = "/var/log/messages"
        self.log_glusterd = "/var/log/glusterfs/glusterd.log"

        # Enabling server quorum all volumes
        self.quorum_options = {'cluster.server-quorum-type': 'server'}
        for volume in self.volume_list:
            ret = set_volume_options(self.mnode, volume, self.quorum_options)
            self.assertTrue(
                ret, "gluster volume set %s cluster.server"
                "-quorum-type server Failed" % self.volname)
            g.log.info(
                "gluster volume set %s cluster.server-quorum"
                "-type server enabled successfully", self.volname)

        # Setting Quorum ratio in percentage
        self.quorum_perecent = {'cluster.server-quorum-ratio': '91%'}
        ret = set_volume_options(self.mnode, 'all', self.quorum_perecent)
        self.assertTrue(
            ret, "gluster volume set all cluster.server-quorum-"
            "ratio percentage Failed :%s" % self.servers)
        g.log.info(
            "gluster volume set all cluster.server-quorum-ratio 91 "
            "percentage enabled successfully :%s", self.servers)

        # counting quorum regain messages-id '106002' in  /var/log/messages
        # file, before glusterd services stop
        cmd_messages = ' '.join(
            ['grep -o', '106002', self.log_messages, '| wc -l'])
        ret, before_glusterd_stop_msgid_count, _ = g.run(
            self.mnode, cmd_messages)
        self.assertEqual(
            ret, 0, "Failed to grep quorum regain message-id "
            "106002 count in : %s" % self.log_messages)

        # counting quorum regain messages-id '106002' in
        # /var/log/glusterfs/glusterd.log file, before glusterd services stop
        cmd_glusterd = ' '.join(
            ['grep -o', '106002', self.log_glusterd, '| wc -l'])
        ret, before_glusterd_stop_glusterd_id_count, _ = g.run(
            self.mnode, cmd_glusterd)
        self.assertEqual(
            ret, 0, "Failed to grep quorum regain message-id "
            "106002 count in :%s" % self.log_glusterd)

        # Stopping glusterd services
        ret = stop_glusterd(self.servers[1])
        self.glusterd_service = False
        self.assertTrue(ret,
                        "Failed stop glusterd services : %s" % self.servers[1])
        g.log.info("Stopped glusterd services successfully on: %s",
                   self.servers[1])

        # checking glusterd service stopped or not
        ret = is_glusterd_running(self.servers[1])
        self.assertEqual(ret, 1, "glusterd service should be stopped")

        # counting quorum regain messages-id '106002' in /var/log/messages file
        # after glusterd services stop.
        count = 0
        msg_count = False
        expected_msg_id_count = int(before_glusterd_stop_msgid_count) + 2
        while count <= 10:
            ret, after_glusterd_stop_msgid_count, _ = g.run(
                self.mnode, cmd_messages)
            if (re.search(r'\b' + str(expected_msg_id_count) + r'\b',
                          after_glusterd_stop_msgid_count)):
                msg_count = True
                break
            sleep(5)
            count += 1
        self.assertTrue(
            msg_count, "Failed to grep quorum regain message-id "
            "106002 count in :%s" % self.log_messages)

        # counting quorum regain messages-id '106002' in
        # /var/log/glusterfs/glusterd.log file after glusterd services stop
        ret, after_glusterd_stop_glusterd_id_count, _ = g.run(
            self.mnode, cmd_glusterd)
        self.assertEqual(
            ret, 0, "Failed to grep quorum regain message-id "
            "106002 count in :%s" % self.log_glusterd)

        # Finding quorum regain message-id count difference between before
        # and after glusterd services stop in /var/log/messages
        count_diff = (int(after_glusterd_stop_msgid_count) -
                      int(before_glusterd_stop_msgid_count))

        self.assertEqual(
            count_diff, 2, "Failed to record regain messages "
            "in : %s" % self.log_messages)
        g.log.info(
            "regain messages recorded for two volumes "
            "successfully after glusterd services stop "
            ":%s", self.log_messages)

        # Finding quorum regain message-id  count difference between before
        # and after glusterd services stop in /var/log/glusterfs/glusterd.log
        count_diff = (int(after_glusterd_stop_glusterd_id_count) -
                      int(before_glusterd_stop_glusterd_id_count))
        self.assertEqual(
            count_diff, 2, "Failed to record regain messages in "
            ": %s" % self.log_glusterd)
        g.log.info(
            "regain messages recorded for two volumes successfully "
            "after glusterd services stop :%s", self.log_glusterd)

        # counting quorum messages-id '106003' in a /var/log/messages file
        # before glusterd services start
        cmd_messages = ' '.join(
            ['grep -o', '106003', self.log_messages, '| wc -l'])
        ret, before_glusterd_start_msgid_count, _ = g.run(
            self.mnode, cmd_messages)
        self.assertEqual(
            ret, 0, "Failed to grep quorum message-id 106003 "
            "count in :%s" % self.log_messages)

        # counting quorum regain messages-id '106003' in
        # /var/log/glusterfs/glusterd.log file before glusterd services start
        cmd_glusterd = ' '.join(
            ['grep -o', '106003', self.log_glusterd, '| wc -l'])
        ret, before_glusterd_start_glusterd_id_count, _ = g.run(
            self.mnode, cmd_glusterd)
        self.assertEqual(
            ret, 0, "Failed to grep quorum regain message-id "
            "106003 count in :%s" % self.log_glusterd)

        # Startin glusterd services
        ret = start_glusterd(self.servers[1])
        self.glusterd_service = True
        self.assertTrue(
            ret, "Failed to start glusterd "
            "services: %s" % self.servers[1])

        # Checking glusterd service running or not
        ret = is_glusterd_running(self.servers[1])
        self.assertEqual(ret, 0, "glusterd service should be running")

        # counting quorum messages-id '106003' in a file in a
        # /var/log/messages file after glusterd service start
        count = 0
        expected_msg_id_count = int(before_glusterd_start_msgid_count) + 2
        msg_count = False
        while count <= 10:
            ret, after_glusterd_start_msgid_count, _ = g.run(
                self.mnode, cmd_messages)
            if (re.search(r'\b' + str(expected_msg_id_count) + r'\b',
                          after_glusterd_start_msgid_count)):
                msg_count = True
                break
            sleep(5)
            count += 1

        self.assertTrue(
            msg_count, "Failed to grep quorum message-id 106003 "
            "count in :%s" % self.log_messages)

        # counting quorum regain messages-id '106003' in
        # /var/log/glusterfs/glusterd.log file after glusterd services start
        ret, after_glusterd_start_glusterd_id_count, _ = g.run(
            self.mnode, cmd_glusterd)
        self.assertEqual(
            ret, 0, "Failed to grep quorum regain message-id "
            "106003 count in :%s" % self.log_glusterd)

        # Finding quorum regain message-id count difference between before
        # and after glusterd services start in /var/log/messages
        count_diff = (int(after_glusterd_start_msgid_count) -
                      int(before_glusterd_start_msgid_count))
        self.assertEqual(
            count_diff, 2, "Failed to record regain "
            "messages in :%s" % self.log_messages)
        g.log.info(
            "regain messages recorded for two volumes successfully "
            "after glusterd services start in :%s", self.log_messages)
        # Finding quorum regain message-id count difference between before
        # and after glusterd services start in /var/log/glusterfs/glusterd.log
        count_diff = (int(after_glusterd_start_glusterd_id_count) -
                      int(before_glusterd_start_glusterd_id_count))
        self.assertEqual(
            count_diff, 2, "Failed to record regain messages "
            "in : %s" % self.log_glusterd)
        g.log.info(
            "regain messages recorded for two volumes successfully "
            "after glusterd services start :%s", self.log_glusterd)
def enable_pvc_resize(master_node):
    '''
     This function edits the /etc/origin/master/master-config.yaml
     file - to enable pv_resize feature
     and restarts atomic-openshift service on master node
     Args:
         master_node (str): hostname of masternode  on which
                           want to edit the
                           master-config.yaml file
     Returns:
         bool: True if successful,
               otherwise raise Exception
    '''
    version = get_openshift_version()
    if version < "3.9":
        msg = ("pv resize is not available in openshift "
               "version %s " % version)
        g.log.error(msg)
        raise NotSupportedException(msg)

    try:
        conn = g.rpyc_get_connection(master_node, user="******")
        if conn is None:
            err_msg = ("Failed to get rpyc connection of node %s"
                       % master_node)
            g.log.error(err_msg)
            raise ExecutionError(err_msg)

        with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'r') as f:
            data = yaml.load(f)
            dict_add = data['admissionConfig']['pluginConfig']
            if "PersistentVolumeClaimResize" in dict_add:
                g.log.info("master-config.yaml file is already edited")
                return True
            dict_add['PersistentVolumeClaimResize'] = {
                'configuration': {
                    'apiVersion': 'v1',
                    'disable': 'false',
                    'kind': 'DefaultAdmissionConfig'}}
            data['admissionConfig']['pluginConfig'] = dict_add
            kube_config = data['kubernetesMasterConfig']
            for key in ('apiServerArguments', 'controllerArguments'):
                kube_config[key] = (
                    kube_config.get(key)
                    if isinstance(kube_config.get(key), dict) else {})
                value = ['ExpandPersistentVolumes=true']
                kube_config[key]['feature-gates'] = value
        with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'w+') as f:
            yaml.dump(data, f, default_flow_style=False)
    except Exception as err:
        raise ExecutionError("failed to edit master-config.yaml file "
                             "%s on %s" % (err, master_node))
    finally:
        g.rpyc_close_connection(master_node, user="******")

    g.log.info("successfully edited master-config.yaml file "
               "%s" % master_node)
    if version == "3.9":
        cmd = ("systemctl restart atomic-openshift-master-api "
               "atomic-openshift-master-controllers")
    else:
        cmd = ("/usr/local/bin/master-restart api && "
               "/usr/local/bin/master-restart controllers")
    ret, out, err = g.run(master_node, cmd, "root")
    if ret != 0:
        err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % (
            cmd, master_node, out, err)
        g.log.error(err_msg)
        raise ExecutionError(err_msg)

    # Wait for API service to be ready after the restart
    for w in waiter.Waiter(timeout=120, interval=1):
        try:
            cmd_run("oc get nodes", master_node)
            return True
        except AssertionError:
            continue
    err_msg = "Exceeded 120s timeout waiting for OCP API to start responding."
    g.log.error(err_msg)
    raise ExecutionError(err_msg)
Beispiel #43
0
    def test_open_file_migration(self):
        """
        Description: Checks that files with open fd are migrated successfully.

        Steps :
        1) Create a volume.
        2) Mount the volume using FUSE.
        3) Create files on volume mount.
        4) Open fd for the files and keep on doing read write operations on
           these files.
        5) While fds are open, add bricks to the volume and trigger rebalance.
        6) Wait for rebalance to complete.
        7) Wait for write on open fd to complete.
        8) Check for any data loss during rebalance.
        9) Check if rebalance has any failures.
        """
        # Create files and open fd for the files on mount point
        m_point = self.mounts[0].mountpoint
        cmd = ('cd {}; for i in `seq 261 1261`;do touch testfile$i;'
               'done'.format(m_point))
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to create files")
        g.log.info("Successfully created files")
        proc = open_file_fd(m_point,
                            2,
                            self.clients[0],
                            start_range=301,
                            end_range=400)

        # Calculate file count for the mount-point
        cmd = ("ls -lR {}/testfile* | wc -l".format(m_point))
        ret, count_before, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to get file count")
        g.log.info("File count before rebalance is:%s", count_before)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Trigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance")
        g.log.info("Rebalance is started")

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=300)
        self.assertTrue(ret, ("Rebalance failed on volume %s", self.volname))
        g.log.info("Rebalance is successful on " "volume %s", self.volname)

        # Close connection and check if write on open fd has completed
        ret, _, _ = proc.async_communicate()
        self.assertEqual(ret, 0, "Write on open fd" " has not completed yet")
        g.log.info("Write completed on open fd")

        # Calculate file count for the mount-point
        cmd = ("ls -lR {}/testfile* | wc -l".format(m_point))
        ret, count_after, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to get file count")
        g.log.info("File count after rebalance is:%s", count_after)

        # Check if there is any data loss
        self.assertEqual(
            int(count_before), int(count_after),
            "The file count before and after"
            " rebalance is not same."
            " There is data loss.")
        g.log.info("The file count before and after rebalance is same."
                   " No data loss occurred.")

        # Check if rebalance has any failures
        ret = get_rebalance_status(self.mnode, self.volname)
        no_of_failures = ret['aggregate']['failures']
        self.assertEqual(int(no_of_failures), 0, "Failures in rebalance")
        g.log.info("No failures in rebalance")
    def test_restart_glusterd_after_rebalance(self):

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        g.log.info(
            "Successful in logging volume info and status of "
            "volume %s", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand success", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode,
                                                   self.volname,
                                                   timeout=600)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Failed to start rebalance on %s ", self.volname))
        g.log.info("Successfully started rebalance on %s ", self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # restart glusterd on all servers
        g.log.info("Restart glusterd on all servers %s", self.servers)
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret,
            ("Failed to restart glusterd on all servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: active)
        g.log.info(
            "Check if glusterd is running on all servers %s"
            "(expected: active)", self.servers)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0,
            ("Glusterd is not running on all servers %s", self.servers))
        g.log.info("Glusterd is running on all the servers %s", self.servers)

        # Check if rebalance process has started after glusterd restart
        g.log.info("Checking if rebalance process has started after "
                   "glusterd restart")
        for server in self.servers:
            ret, _, _ = g.run(server, "pgrep rebalance")
            self.assertNotEqual(ret, 0, ("Rebalance process is triggered on "
                                         "%s after glusterd restart", server))
            g.log.info(
                "Rebalance is NOT triggered on %s after glusterd "
                "restart", server)
 def test_negative_test(self):
     """Testing an expected failure as negative test"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, _, _ = g.run(self.masternode, "false")
     self.assertEqual(rcode, 1)
    def test_split_brain(self):
        """
        Description: Create split-brain on files and check if IO's fail
        - Disable self-heal and cluster-quorum-type
        - Get the bricks from the volume
        - Write IO and validate IO
        - Bring 1st set of brick offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Bring 2nd set of bricks offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Check volume is in split-brain
        - Write IO and validate IO - should fail
        - Enable self-heal and cluster-quorum-type
        - Write IO and validate IO - should fail
        """
        # Disable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "off", "cluster.quorum-type": "none"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Get the bricks from the volume
        sub_vols = get_subvols(self.mnode, self.volname)
        self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0])

        # Write IO's
        write_cmd = ("/usr/bin/env python %s create_files -f 1 "
                     "--base-file-name test_file --fixed-file-size 1k %s" %
                     (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, write_cmd)

        # Bring 1st set of brick offline(1 Data brick and arbiter brick)
        for bricks in ((0, -1), (1, -1)):
            down_bricks = []
            for brick in bricks:
                down_bricks.append(self.bricks_to_bring_offline[brick])
            ret = bring_bricks_offline(self.volname, down_bricks)
            self.assertTrue(
                ret, 'Failed to bring bricks {} offline'.format(down_bricks))
            proc = g.run_async(self.mounts[0].client_system, write_cmd)

            # Validate I/O
            self.assertTrue(validate_io_procs([proc], self.mounts),
                            "IO failed on some of the clients")

            # Bring bricks online
            self._bring_bricks_online()

        # Check volume is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "unable to create split-brain scenario")
        g.log.info("Successfully created split brain scenario")

        # Write IO's
        proc2 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(validate_io_procs([proc2], self.mounts),
                         "IO passed on split-brain")
        g.log.info("Expected - IO's failed due to split-brain")

        # Enable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "on", "cluster.quorum-type": "auto"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Write IO's
        proc3 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(validate_io_procs([proc3], self.mounts),
                         "IO passed on split-brain")
        g.log.info("Expected - IO's failed due to split-brain")
Beispiel #47
0
    def test_ec_truncate_file_with_brick_down(self):
        """
        Test steps:
        1. Create a volume, start and mount it on a client
        2. Bring down redundant bricks in the subvol
        3. Create a file on the volume using "touch"
        4. Truncate the file using "O_TRUNC"
        5. Bring the brick online
        6. Write data on the file and wait for heal completion
        7. Check for crashes and coredumps
        """
        # pylint: disable=unsubscriptable-object
        for restart_type in ("volume_start", "node_reboot"):
            # Time stamp from mnode for checking cores at the end of test
            ret, test_timestamp, _ = g.run(self.mnode, "date +%s")
            self.assertEqual(ret, 0, "date command failed")
            test_timestamp = test_timestamp.strip()

            # Create a file using touch
            file_name = self.mounts[0].mountpoint + "/test_1"
            ret, _, err = g.run(self.mounts[0].client_system,
                                "touch {}".format(file_name))
            self.assertEqual(ret, 0, "File creation failed")
            g.log.info("File Created successfully")

            # List two bricks in each subvol
            subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
            bricks_to_bring_offline = []
            for subvol in subvols:
                self.assertTrue(subvol, "List is empty")
                bricks_to_bring_offline.extend(sample(subvol, 2))

            # Bring two bricks of each subvol offline
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, "Bricks are still online")

            # Validating the bricks are offline or not
            ret = are_bricks_offline(self.mnode, self.volname,
                                     bricks_to_bring_offline)
            self.assertTrue(
                ret, "Few of the bricks are still online in"
                " {} in".format(bricks_to_bring_offline))

            # Truncate the file
            cmd = (
                'python -c "import os, sys; fd = os.open(\'{}\', os.O_TRUNC )'
                '; os.close( fd )"').format(file_name)
            ret, _, err = g.run(self.mounts[0].client_system, cmd)
            self.assertEqual(ret, 0, err)
            g.log.info("File truncated successfully")

            # Bring back the bricks online
            if restart_type == "volume_start":
                # Bring back bricks online by volume start
                ret, _, err = volume_start(self.mnode,
                                           self.volname,
                                           force=True)
                self.assertEqual(ret, 0, err)
                g.log.info("All bricks are online")
            elif restart_type == "node_reboot":
                # Bring back the bricks online by node restart
                for brick in bricks_to_bring_offline:
                    node_to_reboot = brick.split(":")[0]
                    ret = reboot_nodes_and_wait_to_come_online(node_to_reboot)
                    self.assertTrue(
                        ret, "Reboot Failed on node: "
                        "{}".format(node_to_reboot))
                    g.log.info("Node: %s rebooted successfully",
                               node_to_reboot)
                    time.sleep(60)

            # Check whether bricks are online or not
            ret = are_bricks_online(self.mnode, self.volname,
                                    bricks_to_bring_offline)
            self.assertTrue(
                ret,
                "Bricks {} are still offline".format(bricks_to_bring_offline))

            # write data to the file
            cmd = ('python -c "import os, sys;fd = os.open(\'{}\', '
                   'os.O_RDWR) ;'
                   'os.write(fd, \'This is test after truncate\'.encode());'
                   ' os.close(fd)"').format(file_name)

            ret, _, err = g.run(self.mounts[0].client_system, cmd)
            self.assertEqual(ret, 0, err)
            g.log.info("Data written successfully on to the file")

            # Monitor heal completion
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret, "Heal pending for file {}".format(file_name))

            # check for any crashes on servers and client
            for nodes in (self.servers, [self.clients[0]]):
                ret = is_core_file_created(nodes, test_timestamp)
                self.assertTrue(ret,
                                "Cores found on the {} nodes".format(nodes))
Beispiel #48
0
    def test_client_side_quorum_auto_local_to_volume_not_cluster(self):
        """
        - create four volume as below
            vol1->2x2
            vol2->2x2
            vol3->2x3
            vol4->2x3
            vol5->a pure distribute volume
        - do IO to all vols
        - set client side quorum to auto for vol1 and vol3
        - get the client side quorum value for all vols and check for result
        - bring down b0 on vol1 and b0 and b1 on vol3
        - try to create files on all vols and check for result
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Creating files for all volumes
        for mount_point in self.mount_points:
            self.all_mounts_procs = []
            g.log.info('Creating files...')
            command = ("python %s create_files -f 50 "
                       "--fixed-file-size 1k %s" %
                       (self.script_upload_path, mount_point))

            proc = g.run_async(self.mounts[0].client_system, command)
            self.all_mounts_procs.append(proc)
            self.io_validation_complete = False

            # Validate IO
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True

        volumes_to_change_options = ['1', '3']
        # set cluster.quorum-type to auto
        for vol_number in volumes_to_change_options:
            vol_name = ('testvol_distributed-replicated_%s' % vol_number)
            options = {"cluster.quorum-type": "auto"}
            g.log.info(
                "setting cluster.quorum-type to auto on "
                "volume testvol_distributed-replicated_%s", vol_number)
            ret = set_volume_options(self.mnode, vol_name, options)
            self.assertTrue(ret, ("Unable to set volume option %s for "
                                  "volume %s" % (options, vol_name)))
            g.log.info("Successfully set %s for volume %s", options, vol_name)

        # check is options are set correctly
        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:
            g.log.info('Checking for cluster.quorum-type option for %s',
                       volume)
            volume_options_dict = get_volume_options(self.mnode, volume,
                                                     'cluster.quorum-type')
            if (volume == 'testvol_distributed-replicated_1'
                    or volume == 'testvol_distributed-replicated_3'
                    or volume == 'testvol_distributed-replicated_4'):
                self.assertEqual(
                    volume_options_dict['cluster.quorum-type'], 'auto',
                    'Option cluster.quorum-type '
                    'is not AUTO for %s' % volume)
                g.log.info('Option cluster.quorum-type is AUTO for %s', volume)
            else:
                self.assertEqual(
                    volume_options_dict['cluster.quorum-type'], 'none',
                    'Option cluster.quorum-type '
                    'is not NONE for %s' % volume)
                g.log.info('Option cluster.quorum-type is NONE for %s', volume)

        # Get first brick server and brick path
        # and get first file from filelist then delete it from volume
        vols_file_list = {}
        for volume in volume_list:
            brick_list = get_all_bricks(self.mnode, volume)
            brick_server, brick_path = brick_list[0].split(':')
            ret, file_list, _ = g.run(brick_server, 'ls %s' % brick_path)
            self.assertFalse(ret, 'Failed to ls files on %s' % brick_server)
            file_from_vol = file_list.splitlines()[0]
            ret, _, _ = g.run(brick_server,
                              'rm -rf %s/%s' % (brick_path, file_from_vol))
            self.assertFalse(ret, 'Failed to rm file on %s' % brick_server)
            vols_file_list[volume] = file_from_vol

        # bring bricks offline
        # bring first brick for testvol_distributed-replicated_1
        volname = 'testvol_distributed-replicated_1'
        brick_list = get_all_bricks(self.mnode, volname)
        bricks_to_bring_offline = brick_list[0:1]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, volname, bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # bring first two bricks for testvol_distributed-replicated_3
        volname = 'testvol_distributed-replicated_3'
        brick_list = get_all_bricks(self.mnode, volname)
        bricks_to_bring_offline = brick_list[0:2]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, volname, bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # merge two dicts (volname: file_to_delete) and (volname: mountpoint)
        temp_dict = [vols_file_list, self.mount_points_and_volnames]
        file_to_delete_to_mountpoint_dict = {}
        for k in vols_file_list.iterkeys():
            file_to_delete_to_mountpoint_dict[k] = (tuple(
                file_to_delete_to_mountpoint_dict[k]
                for file_to_delete_to_mountpoint_dict in temp_dict))

        # create files on all volumes and check for result
        for volname, file_and_mountpoint in \
                file_to_delete_to_mountpoint_dict.iteritems():
            filename, mountpoint = file_and_mountpoint

            # check for ROFS error for read-only file system for
            # testvol_distributed-replicated_1 and
            # testvol_distributed-replicated_3
            if (volname == 'testvol_distributed-replicated_1'
                    or volname == 'testvol_distributed-replicated_3'):
                # create new file taken from vols_file_list
                g.log.info("Start creating new file on all mounts...")
                all_mounts_procs = []
                cmd = ("touch %s/%s" % (mountpoint, filename))

                proc = g.run_async(self.client, cmd)
                all_mounts_procs.append(proc)

                # Validate IO
                g.log.info("Validating if IO failed with read-only filesystem")
                ret, _ = is_io_procs_fail_with_rofs(self, all_mounts_procs,
                                                    self.mounts)
                self.assertTrue(ret, ("Unexpected error and IO successful"
                                      " on read-only filesystem"))
                g.log.info("EXPECTED: "
                           "Read-only file system in IO while creating file")

            # check for no errors for all the rest volumes
            else:
                # create new file taken from vols_file_list
                g.log.info("Start creating new file on all mounts...")
                all_mounts_procs = []
                cmd = ("touch %s/%s" % (mountpoint, filename))

                proc = g.run_async(self.client, cmd)
                all_mounts_procs.append(proc)

                # Validate IO
                self.assertTrue(
                    validate_io_procs(all_mounts_procs, self.mounts),
                    "IO failed on some of the clients")
def is_nfs_ganesha_cluster_in_healthy_state(mnode):
    """
       Checks whether nfs ganesha cluster is in healthy state.

    Args:
        mnode (str): Node in which cmd command will
            be executed.

    Returns:
        bool : True if nfs ganesha cluster is in healthy state.
            False otherwise

    Example:
        is_nfs_ganesha_cluster_in_healthy_state(mnode)
    """

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep " +
           " 'Cluster HA Status' | cut -d ' ' -f 4 ")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to check "
                    "if cluster is in healthy state")
        return False

    if stdout.strip('\n') != "HEALTHY":
        g.log.error("nfs-ganesha cluster is not in healthy state. Current "
                    "cluster state: %s " % stdout)
        return False

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep -v" +
           " 'Online' | grep -v 'Cluster' | cut -d ' ' -f 1 | " +
           "sed s/'-cluster_ip-1'//g")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to parse "
                    "for the cluster resources")
        return False

    cluster_list = stdout.split("\n")
    cluster_list = list(filter(None, cluster_list))

    cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --status " +
           "/run/gluster/shared_storage/nfs-ganesha/ | grep -v" +
           " 'Online' | grep -v 'Cluster' | cut -d ' ' -f 1 | " +
           "sed s/'-cluster_ip-1'//g")

    retcode, stdout, _ = g.run(mnode, cmd)
    if retcode != 0:
        g.log.error("Failed to execute nfs-ganesha status command to parse "
                    "for the hostnames in cluster")
        return False

    host_list = stdout.split("\n")
    host_list = list(filter(None, host_list))

    if (cluster_list != []) and (cluster_list == host_list):
        g.log.info("nfs ganesha cluster is in HEALTHY state")
        return True

    g.log.error("nfs ganesha cluster is not in HEALTHY state")
    return False
    def test_volume_status_fd(self):

        '''
        -> Create volume
        -> Mount the volume on 2 clients
        -> Run I/O's on mountpoint
        -> While I/O's are in progress
        -> Perform gluster volume status fd repeatedly
        -> List all files and dirs listed
        '''

        # checking volume mounted or not
        for mount_obj in self.mounts:
            ret = is_mounted(self.volname, mount_obj.mountpoint, self.mnode,
                             mount_obj.client_system, self.mount_type)
            self.assertTrue(ret, "Not mounted on %s"
                            % mount_obj.client_system)
            g.log.info("Mounted on %s", mount_obj.client_system)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 15 %s" % (self.script_upload_path,
                                             self.counter,
                                             mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # performing  "gluster volume status volname fd" command on
        # all cluster servers randomly while io is in progress,
        # this command should not get hang while io is in progress
        count = 0
        while count < 300:
            ret, _, _ = g.run(random.choice(self.servers),
                              "gluster volume status %s fd" % self.volname)
            self.assertEqual(ret, 0, ("Volume status 'fd' failed on volume %s"
                                      % self.volname))
            g.log.info("Volume status fd is successful for %s", self.volname)
            count += 1

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")