def test_local_module_on_remote(self):
        """Testing local module definition on remote system"""
        connection = g.rpyc_get_connection(self.masternode)
        import supporting_files.rpyc.local_module
        r = g.rpyc_define_module(connection,
                                 supporting_files.rpyc.local_module)

        # test global variable
        self.assertEqual(r.myvariable, 'yada yada yada')

        # test class attribute
        self.assertEqual(r.myclass.myclassattribute, 'yada yada yada')

        # test static method
        output = r.myclass.static_method()
        self.assertIn('static:', output)

        # test class method
        output = r.myclass.class_method()
        self.assertIn('class:', output)

        # test instance method
        x = r.myclass()
        output = x.instance_method()
        self.assertIn('instance:', output)
def cmd_run(cmd, hostname, raise_on_error=True):
    """Glusto's command runner wrapper.

    Args:
        cmd (str): Shell command to run on the specified hostname.
        hostname (str): hostname where Glusto should run specified command.
        raise_on_error (bool): defines whether we should raise exception
                               in case command execution failed.
    Returns:
        str: Stripped shell command's stdout value if not None.
    """
    ret, out, err = g.run(hostname, cmd, "root")
    if ("no ssh connection" in err.lower() or
            "tls handshake timeout" in err.lower()):
        g.ssh_close_connection(hostname)
        ret, out, err = g.run(hostname, cmd, "root")
    msg = ("Failed to execute command '%s' on '%s' node. Got non-zero "
           "return code '%s'. Err: %s" % (cmd, hostname, ret, err))
    if int(ret) != 0:
        g.log.error(msg)
    if raise_on_error:
        assert int(ret) == 0, msg

    out = out.strip() if out else out

    return out
Beispiel #3
0
    def setUpClass(cls):
        """unittest standard setUpClass method
        Runs before all test_ methods in the class
        """
        print "Setting Up Class: %s" % cls.__name__
        # Setting class attributes for use across all test methods
        cls.yaml_file = '/tmp/testconfig.yml'
        cls.ini_file = '/tmp/testconfig.ini'
        cls.ini_ordered_file = '/tmp/testconfig_ordered.ini'

        cls.config = {}
        cls.config['defaults'] = {}
        cls.config['defaults']['this'] = 'yada1'
        cls.config['defaults']['that'] = 'yada2'
        cls.config['globals'] = {}
        cls.config['globals']['the_other'] = 'yada3'
        # to test ini substitution
        cls.config['defaults']['this_and_that'] = '%(this)s and %(that)s'

        g.show_config(cls.config)

        cls.order = ['defaults', 'globals']

        # cleanup files if they exist
        '''
    def test_connection(self):
        """Testing rpyc connection"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        g.rpyc_get_connection(self.masternode)
        pingable = g.rpyc_ping_connection(self.masternode)

        self.assertTrue(pingable, "Connection did not ping.")
 def setUpClass(cls):
     """unittest standard setUpClass method
     Runs before all test_ methods in the class
     """
     print "Setting Up Class: %s" % cls.__name__
     config = g.load_configs(["../examples/systems.yml",
                              "../examples/glusto.yml"])
     g.update_config(config)
    def setUp(self):
        """unittest standard setUp method
        Runs before each test_ method
        """
        print "Setting Up: %s" % self.id()
        config = g.load_configs(["../examples/systems.yml",
                                 "../examples/glusto.yml"])
        g.update_config(config)

        self.masternode = g.config["nodes"][0]
        self.client = g.config["clients"][0]
Beispiel #7
0
def handle_configs(config_list):
    """Load default and user-specified configuration files"""

    # load default config
    g.log.info("Loading default configuration files.")
    g.load_config_defaults()

    # load user specified configs (can also override defaults)
    if (config_list):
        g.log.info("Loading user specified configuration files.")
        config_files = config_list.split()
        config = g.load_configs(config_files)
        g.update_config(config)
    def setUp(self):
        """unittest standard setUp method
        Runs before each test_ method
        """
        print "Setting Up: %s" % self.id()
        # render the template
        g.render_template(self.template_file,
                          self.template_vars,
                          self.output_file,
                          self.search_path)

        # read the resulting config file built from template
        self.output_config = g.load_config(self.output_file)
        g.show_config(self.output_config)
Beispiel #9
0
    def setUpClass(cls):
        """unittest standard setUpClass method
        Runs before all test_ methods in the class
        """
        print "Setting Up Class: %s" % cls.__name__
        config = g.load_configs(["../examples/systems.yml",
                                 "../examples/glusto.yml"])
        g.update_config(config)

        cls.hosts = g.config['nodes']
        cls.primary_host = g.config['nodes'][0]
        cls.client = g.config["clients"][0]

        cls.test_string = 'Go for the Glusto!'
def validate_multipath_pod(hostname, podname, hacount, mpath=""):
    '''
     This function validates multipath for given app-pod
     Args:
         hostname (str): ocp master node name
         podname (str): app-pod name for which we need to validate
                        multipath. ex : nginx1
         hacount (int): multipath count or HA count. ex: 3
     Returns:
         bool: True if successful,
               otherwise False
    '''
    cmd = "oc get pods -o wide | grep %s | awk '{print $7}'" % podname
    ret, out, err = g.run(hostname, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, hostname, out))
        return False
    pod_nodename = out.strip()
    active_node_count = 1
    enable_node_count = hacount - 1
    cmd = "multipath -ll %s | grep 'status=active' | wc -l" % mpath
    ret, out, err = g.run(pod_nodename, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, pod_nodename, out))
        return False
    active_count = int(out.strip())
    if active_node_count != active_count:
        g.log.error("active node count on %s for %s is %s and not 1"
                    % (pod_nodename, podname, active_count))
        return False
    cmd = "multipath -ll %s | grep 'status=enabled' | wc -l" % mpath
    ret, out, err = g.run(pod_nodename, cmd, "root")
    if ret != 0 or out == "":
        g.log.error("failed to exectute cmd %s on %s, err %s"
                    % (cmd, pod_nodename, out))
        return False
    enable_count = int(out.strip())
    if enable_node_count != enable_count:
        g.log.error("passive node count on %s for %s is %s "
                    "and not %s" % (
                        pod_nodename, podname, enable_count,
                        enable_node_count))
        return False

    g.log.info("validation of multipath for %s is successfull"
               % podname)
    return True
Beispiel #11
0
    def test_yaml(self):
        """Testing yaml config file"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        # write the config file
        g.store_config(self.config, self.yaml_file)
        # TODO: does unittest have a file exists assert?
        self.assertTrue(os.path.exists(self.yaml_file))

        # read the config file
        config = g.load_config(self.yaml_file)
        g.show_config(config)
        self.assertEqual(config['defaults']['this'], 'yada1')
        self.assertEqual(config['defaults']['that'], 'yada2')
        self.assertEqual(config['globals']['the_other'], 'yada3')
Beispiel #12
0
    def test_ini(self):
        """Testing ini config file(s)"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        g.store_config(self.config, self.ini_file)
        self.assertTrue(os.path.exists(self.ini_file))

        # read the config file
        config = g.load_config(self.ini_file)
        g.show_config(config)
        self.assertEqual(config['defaults']['this'], 'yada1')
        self.assertEqual(config['defaults']['that'], 'yada2')
        self.assertEqual(config['defaults']['this_and_that'],
                         'yada1 and yada2')
        self.assertEqual(config['globals']['the_other'], 'yada3')
Beispiel #13
0
 def test_stress_stderr(self):
     """Send load of text output to stderr"""
     command = '''ls -Rail /etc > /tmp/railetc
         for i in $(seq 1 1000)
         do
             cat /tmp/railetc >&2
         done
         echo "Complete" >&2
         '''
     g.disable_log_levels('INFO')
     rcode, rout, rerr = g.run(self.primary_host, command)
     g.reset_log_levels()
     self.assertEqual(rcode, 0, 'stressing stderr failed')
     self.assertEqual(rout, '', 'sdtout has content.')
     self.assertNotEqual(rerr, '', 'stderr has no content.')
    def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Create glusterblock PVC when gluster pod is down."""

        # Check that we work with containerized Gluster
        if not self.is_containerized_gluster():
            self.skipTest("Only containerized Gluster clusters are supported.")

        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node, pvc_name, "test-pvc-mount-on-app-pod",
            mount_path=mount_path)
        self.addCleanup(
            wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(
            self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 15):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
 def test_return_code(self):
     """Testing the return code"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run(self.masternode, "cat /etc/fstab")
     self.assertEqual(rcode, 0)
     self.assertTrue(rout)
     self.assertFalse(rerr)
 def test_stderr(self):
     """Testing output to stderr"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run(self.masternode, "uname -a >&2")
     self.assertEqual(rcode, 0)
     self.assertFalse(rout)
     self.assertTrue(rerr)
    def setUpClass(cls):
        """Initialize all the variables necessary for test cases."""
        super(BaseClass, cls).setUpClass()

        # Initializes OCP config variables
        cls.ocp_servers_info = g.config['ocp_servers']
        cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys())
        cls.ocp_master_node_info = g.config['ocp_servers']['master']
        cls.ocp_client = list(g.config['ocp_servers']['client'].keys())
        cls.ocp_client_info = g.config['ocp_servers']['client']
        cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys())
        cls.ocp_nodes_info = g.config['ocp_servers']['nodes']

        # Initializes storage project config variables
        openshift_config = g.config.get("cns", g.config.get("openshift"))
        cls.storage_project_name = openshift_config.get(
            'storage_project_name',
            openshift_config.get('setup', {}).get('cns_project_name'))

        # Initializes heketi config variables
        heketi_config = openshift_config['heketi_config']
        cls.heketi_dc_name = heketi_config['heketi_dc_name']
        cls.heketi_service_name = heketi_config['heketi_service_name']
        cls.heketi_client_node = heketi_config['heketi_client_node']
        cls.heketi_server_url = heketi_config['heketi_server_url']
        cls.heketi_cli_user = heketi_config['heketi_cli_user']
        cls.heketi_cli_key = heketi_config['heketi_cli_key']

        cls.gluster_servers = list(g.config['gluster_servers'].keys())
        cls.gluster_servers_info = g.config['gluster_servers']

        cls.storage_classes = openshift_config['dynamic_provisioning'][
            'storage_classes']
        cls.sc = cls.storage_classes.get(
            'storage_class1', cls.storage_classes.get('file_storage_class'))
        cmd = "echo -n %s | base64" % cls.heketi_cli_key
        ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root")
        if ret != 0:
            raise ExecutionError("failed to execute cmd %s on %s out: %s "
                                 "err: %s" % (
                                     cmd, cls.ocp_master_node[0], out, err))
        cls.secret_data_key = out.strip()

        # Checks if heketi server is alive
        if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url):
            raise ConfigError("Heketi server %s is not alive"
                              % cls.heketi_server_url)

        # Switch to the storage project
        if not switch_oc_project(
                cls.ocp_master_node[0], cls.storage_project_name):
            raise ExecutionError("Failed to switch oc project on node %s"
                                 % cls.ocp_master_node[0])

        if 'glustotest_run_id' not in g.config:
            g.config['glustotest_run_id'] = (
                datetime.datetime.now().strftime('%H_%M_%d_%m_%Y'))
        cls.glustotest_run_id = g.config['glustotest_run_id']
        msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id)
        g.log.info(msg)
    def test_log_color_false(self):
        g.config['log_color'] = False

        message = g.colorfy(g.RED, self.non_color_message)

        self.assertEqual(message, self.non_color_message,
                         "non color message does not match expectation")
Beispiel #19
0
 def test_run_local(self):
     """Testing SSH run_local() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run_local('echo -n %s' % self.test_string)
     self.assertEqual(rcode, 0)
     self.assertEqual(rout, self.test_string)
     print rout
     self.assertEqual(rerr, '')
    def test_log_color_true(self):
        g.config['log_color'] = True

        message = g.colorfy(g.RED | g.BG_YELLOW | g.BOLD,
                            self.non_color_message)

        self.assertEqual(message, self.color_message,
                         "color message does not match expectation")
 def test_stdout(self):
     """Testing output to stdout"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     # add a cleanup method to run after tearDown()
     self.addCleanup(self.cleanup_remote_commands)
     for node in g.config["nodes"]:
         rcode, rout, rerr = g.run(node, "ls -ld /etc")
     self.assertEqual(rcode, 0)
     self.assertTrue(rout)
     self.assertFalse(rerr)
    def setUpClass(cls):
        """unittest standard setUpClass method
        Runs before all test_ methods in the class
        """
        print "Setting Up Class: %s" % cls.__name__

        # Setting class attributes for use across all test methods
        cls.config_file = ('supporting_files/templates/'
                           'glusto_templates-vars.yml')
        config = g.load_config(cls.config_file)
        g.show_config(config)
        if config:
            g.update_config(config)

        cls.template_vars = g.config['templates']
        cls.template_file = ('templates/'
                             'glusto_templates-template.jinja')
        cls.search_path = 'supporting_files'
        cls.output_file = '/tmp/glusto_templates-output.yml'
def load_tests(loader, standard_tests, pattern):
    '''Load tests in a specific order.
    unittest standard feature requires Python2.7
    '''
    # TODO: make this configurable!!!
    testcases_ordered = ['test_return_code',
                         'test_stdout',
                         'test_stderr']

    suite = g.load_tests(TestGlustoBasicsPyTest, loader, testcases_ordered)

    return suite
Beispiel #24
0
 def test_run_serial(self):
     """Testing SSH run_serial() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     results = g.run_serial(self.hosts, 'echo -n %s' % self.test_string)
     for host, result in results.iteritems():
         self.assertIn(host, self.hosts)
         print host
         rcode, rout, rerr = result
         self.assertEqual(rcode, 0)
         self.assertEqual(rout, self.test_string)
         print rout
         self.assertEqual(rerr, '')
Beispiel #25
0
    def test_download(self):
        """Testing SSH download() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        local_file = '/tmp/download_test_file'

        # remove local test file (ignore error if not exist)
        g.run_local('rm -f %s' % local_file)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode,  rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_up = rout.strip()

        # download it
        g.download(self.primary_host,
                   '/etc/hosts', '/tmp/download_test_file')

        # md5sum local copy
        command = 'md5sum %s | awk \'{print $1}\'' % local_file
        rcode, rout, _ = g.run_local(command)
        if rcode == 0:
            md5sum_down = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_down, md5sum_up, 'md5sums do not match')
Beispiel #26
0
    def test_transfer(self):
        """Testing SSH transfer() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        remote_file_copy = '/tmp/transfer_test_file'
        host1 = self.hosts[0]
        host2 = self.hosts[1]

        # remove remote test file copy(ignore error if not exist)
        g.run(host2, 'rm -f %s' % remote_file_copy)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode,  rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_orig = rout.strip()

        # transfer it
        g.transfer(host1, remote_file, host2, remote_file_copy)

        # md5sum remote file copy
        command = 'md5sum %s | awk \'{print $1}\'' % remote_file_copy
        rcode, rout, _ = g.run(host2, command)
        if rcode == 0:
            md5sum_copy = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_orig, md5sum_copy, 'md5sums do not match')
    def setUpClass(cls):
        print "setUpClass BASE: %s" % cls.__name__

        if not cls.camera:
            cls.camera = "nikon"
        if not cls.lens:
            cls.lens = "50mm"
        if not cls.filter:
            cls.filter = "none"

        cls.config = g.load_config('tests/glusto_tests_config.yml')
        print("CLASS (BASE) CONFIG:\n%s" % cls.config)
        print "SETUPCLASS GLUSTO (BASE): %s with %s with %s" % (cls.camera,
                                                                cls.lens,
                                                                cls.filter)
def run(target, command, user=None, log_level=None, orig_run=g.run):
    """Function that runs a command on a host or in a pod via a host.
    Wraps glusto's run function.

    Args:
        target (str|Pod): If target is str object and
            it equals to 'auto_get_gluster_endpoint', then
            Gluster endpoint gets autocalculated to be any of
            Gluster PODs or nodes depending on the deployment type of
            a Gluster cluster.
            If it is str object with other value, then it is considered to be
            an endpoint for command.
            If 'target' is of the 'Pod' type,
            then command will run on the specified POD.
        command (str|list): Command to run.
        user (str|None): user to be passed on to glusto's run method
        log_level (str|None): log level to be passed on to glusto's run method
        orig_run (function): The default implementation of the
            run method. Will be used when target is not a pod.

    Returns:
        A tuple of the command's return code, stdout, and stderr.
    """
    # NOTE: orig_run captures the glusto run method at function
    # definition time in order to capture the method before
    # any additional monkeypatching by other code

    if target == 'auto_get_gluster_endpoint':
        ocp_client_node = list(g.config['ocp_servers']['client'].keys())[0]
        gluster_pods = openshift_ops.get_ocp_gluster_pod_names(ocp_client_node)
        if gluster_pods:
            target = Pod(ocp_client_node, gluster_pods[0])
        else:
            target = list(g.config.get("gluster_servers", {}).keys())[0]

    if isinstance(target, Pod):
        prefix = ['oc', 'rsh', target.podname]
        if isinstance(command, six.string_types):
            cmd = ' '.join(prefix + [command])
        else:
            cmd = prefix + command

        # unpack the tuple to make sure our return value exactly matches
        # our docstring
        return g.run(target.node, cmd, user=user, log_level=log_level)
    else:
        return orig_run(target, command, user=user, log_level=log_level)
Beispiel #29
0
 def test_run_parallel(self):
     """Testing SSH run_parallel() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     results = g.run_parallel(self.hosts, 'echo -n %s' % self.test_string)
     hosts_already_tested = []
     for host, result in results.iteritems():
         # test host is in list of hosts to test
         self.assertIn(host, self.hosts)
         # test host has not already been tested
         self.assertNotIn(host, hosts_already_tested)
         hosts_already_tested.append(host)
         print host
         rcode, rout, rerr = result
         self.assertEqual(rcode, 0)
         self.assertEqual(rout, self.test_string)
         print rout
         self.assertEqual(rerr, '')
    def _node_reboot(self):
        storage_hostname = (g.config["gluster_servers"]
                            [self.gluster_servers[0]]["storage"])

        cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
        ret, out, err = g.run(storage_hostname, cmd)

        self.addCleanup(self._wait_for_gluster_pod_to_be_ready)

        if ret != 255:
            err_msg = "failed to reboot host %s error: %s" % (
                storage_hostname, err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        try:
            g.ssh_close_connection(storage_hostname)
        except Exception as e:
            g.log.error("failed to close connection with host %s"
                        " with error: %s" % (storage_hostname, e))
            raise

        # added sleep as node will restart after 3 sec
        time.sleep(3)

        for w in Waiter(timeout=600, interval=10):
            try:
                if g.rpyc_get_connection(storage_hostname, user="******"):
                    g.rpyc_close_connection(storage_hostname, user="******")
                    break
            except Exception as err:
                g.log.info("exception while getting connection: '%s'" % err)

        if w.expired:
            error_msg = ("exceeded timeout 600 sec, node '%s' is "
                         "not reachable" % storage_hostname)
            g.log.error(error_msg)
            raise ExecutionError(error_msg)

        # wait for the gluster pod to be in 'Running' state
        self._wait_for_gluster_pod_to_be_ready()

        # glusterd and gluster-blockd service should be up and running
        service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
        for gluster_pod in self.gluster_pod_list:
            for service in service_names:
                g.log.info("gluster_pod - '%s' : gluster_service '%s'" % (
                    gluster_pod, service))
                check_service_status_on_pod(
                    self.oc_node, gluster_pod, service, "running"
                )
Beispiel #31
0
    def test_volume_status_inode_while_io_in_progress(self):
        '''
        Create any type of volume then mount the volume, once
        volume mounted successfully on client, start running IOs on
        mount point then run the "gluster volume status volname inode"
        command on all clusters randomly.
            "gluster volume status volname inode" command should not get
        hang while IOs in progress.
        Then check that IOs completed successfully or not on mount point.
        Check that files in mount point listing properly or not.
        '''

        # Mounting a volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "Volume mount failed for %s" % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        # After Mounting immediately writing IO's are failing some times,
        # that's why keeping sleep for 10 secs
        sleep(10)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 15 "
                "--max-num-of-dirs 5 "
                "--num-of-files 25 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # performing  "gluster volume status volname inode" command on
        # all cluster servers randomly while io is in progress,
        # this command should not get hang while io is in progress
        # pylint: disable=unused-variable
        for i in range(20):
            ret, _, _ = g.run(
                random.choice(self.servers),
                "gluster --timeout=12000 volume status %s "
                "inode" % self.volname)
            self.assertEqual(ret, 0, ("Volume status 'inode' failed on "
                                      "volume %s" % self.volname))
            g.log.info(
                "Successful in logging volume status"
                "'inode' of volume %s", self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #32
0
    def test_rebalance_with_brick_down(self):
        """
        Rebalance with brick down in replica
        - Create a Replica volume.
        - Bring down one of the brick down in the replica pair
        - Do some IO and create files on the mount point
        - Add a pair of bricks to the volume
        - Initiate rebalance
        - Bring back the brick which was down.
        - After self heal happens, all the files should be present.
        """
        # Log the volume info and status before brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring one fo the bricks offline
        brick_list = get_all_bricks(self.mnode, self.volname)
        ret = bring_bricks_offline(self.volname, choice(brick_list))

        # Log the volume info and status after brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Create files at mountpoint.
        cmd = (
            "/usr/bin/env python %s create_files "
            "-f 2000 --fixed-file-size 1k --base-file-name file %s"
            % (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(
            self.mounts[0].client_system, cmd, user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete.
        self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs,
                                                self.mounts[0]),
                        "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Compute the arequal checksum before bringing all bricks online
        arequal_before_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Log the volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log the voluem info after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Log the voluem info and status before bringing all bricks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring all bricks online.
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Not able to start volume with force option")
        g.log.info("Volume start with force option successful.")

        # Log the volume info and status after bringing all beicks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Monitor heal completion.
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "heal has not yet completed")
        g.log.info("Self heal completed")

        # Compute the arequal checksum after all bricks online.
        arequal_after_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Comparing arequal checksum before and after the operations.
        self.assertEqual(arequal_before_all_bricks_online,
                         arequal_after_all_bricks_online,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
Beispiel #33
0
def share_volume_over_smb(mnode, volname, smb_users_info):
    """Sharing volumes over SMB

    Args:
        mnode (str): Node on which commands has to be executed.
        volname (str): Name of the volume to be shared.
        smb_users_info (dict): Dict containing users info. Example:
            smb_users_info = {
                'root': {'password': '******',
                         'acl': ''
                         },
                'user1': {'password': '******',
                          'acl': ''
                          },
                'user2': {'password': '******',
                          'acl': ''
                          }
                }

    Returns:
        bool : True on successfully sharing the volume over SMB.
            False otherwise
    """
    g.log.info("Start sharing the volume over SMB")

    # Set volume option 'stat-prefetch' to 'on'.
    cmd = "gluster volume set %s stat-prefetch on" % volname
    ret, _, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error("Failed to set the volume option stat-prefetch on")
        return False
    g.log.info("Successfully set 'stat-prefetch' to 'on' on %s", volname)

    # Set volume option 'server.allow-insecure' to 'on'.
    cmd = "gluster volume set %s server.allow-insecure on" % volname
    ret, _, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error("Failed to set the volume option server-allow-insecure")
        return False
    g.log.info("Successfully set 'server-allow-insecure' to 'on' on %s",
               volname)

    # Set 'storage.batch-fsync-delay-usec' to 0.
    # This is to ensure ping_pong's lock and I/O coherency tests works on CIFS.
    cmd = ("gluster volume set %s storage.batch-fsync-delay-usec 0" % volname)
    ret, _, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error(
            "Failed to set the volume option "
            "'storage.batch-fsync-delay-usec' to 0 on %s", volname)
        return False
    g.log.info("Successfully set 'storage.batch-fsync-delay-usec' to 0 on %s",
               volname)

    # Verify if the volume can be accessed from the SMB/CIFS share.
    cmd = ("smbclient -L localhost -U | grep -i -Fw gluster-%s " % volname)
    ret, _, _ = g.run(mnode, cmd)
    if ret != 0:
        g.log.error("volume '%s' not accessable via SMB/CIFS share", volname)
        return False
    g.log.info("volume '%s' can be accessed from SMB/CIFS share", volname)

    # To verify if the SMB/CIFS share can be accessed by the root/non-root user
    # TBD

    # Enable mounting volumes over SMB
    ret = enable_mounting_volume_over_smb(mnode, volname, smb_users_info)
    if not ret:
        g.log.error("Failed to enable mounting volumes using SMB")
        return False
    g.log.info(
        "Successfully enabled mounting volumes using SMV for the "
        "smbusers: %s", str(smb_users_info.keys()))

    # Verify if volume is shared
    ret = is_volume_exported(mnode, volname, "smb")
    if not ret:
        g.log.info("Volume %s is not exported as 'cifs/smb' share", volname)
        return False
    g.log.info("Volume %s is exported as 'cifs/smb' share", volname)

    return True
    def test_subdir_with_quota_limit(self):

        # pylint: disable=too-many-statements
        """
        Mount the volume
        Create 2 subdir on mount point
        dir1-> /level1/subdir1 dir2->/dlevel1/dlevel2/dlevel3/subdir2
        Auth allow - Client1(/level1/subdir1),
        Client2(/dlevel1/dlevel2/dlevel3/subdir2)
        Mount the subdir1 on client 1 and subdir2 on client2
        Enable Quota
        Verify Quota is enabled on volume
        Set quota limit as 1GB and 2GB on both subdirs respectively
        Perform a quota list operation
        Perform IO's on both subdir until quota limit is almost hit for subdir1
        Again Perform a quota list operation
        Run IO's on Client 1.This should fail
        Run IO's on Client2.This should pass
        """

        # Create deep subdirectories  subdir1 and subdir2 on mount point
        ret = mkdir(self.mounts[0].client_system,
                    "%s/level1/subdir1" % self.mounts[0].mountpoint,
                    parents=True)
        self.assertTrue(
            ret, ("Failed to create directory '/level1/subdir1' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/dlevel1/dlevel2/dlevel3/subdir2" %
                    self.mounts[0].mountpoint,
                    parents=True)
        self.assertTrue(
            ret, ("Failed to create directory "
                  "'/dlevel1/dlevel2/dlevel3/subdir2' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        # unmount volume
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Volumes Unmount failed")
        g.log.info("Volumes Unmounted successfully")

        # Set authentication on the subdirectory subdir1
        # and subdir2
        g.log.info(
            'Setting authentication on directories subdir1 and subdir2'
            'for client %s and %s', self.clients[0], self.clients[1])
        ret = set_auth_allow(
            self.volname, self.mnode, {
                '/level1/subdir1': [self.clients[0]],
                '/dlevel1/dlevel2/dlevel3/subdir2': [self.clients[1]]
            })
        self.assertTrue(
            ret, 'Failed to set Authentication on volume %s' % self.volume)

        # Creating mount list for subdirectories
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/level1/subdir1" % self.volname
        self.subdir_mounts[1].volname = ("%s/dlevel1/dlevel2/dlevel3/subdir2" %
                                         self.volname)

        # Mount Subdirectory "subdir1" on client 1 and "subdir2" on client 2
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount  %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted subdirectories on client1"
                   "and clients 2")

        # Enable quota on volume
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to enable quota on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Check if quota is enabled
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Quota is not enabled on the volume %s", self.volname))
        g.log.info("Successfully Validated quota is enabled on volume %s",
                   self.volname)

        # Setting up path to set quota limit

        path1 = "/level1/subdir1"
        path2 = "/dlevel1/dlevel2/dlevel3/subdir2"

        # Set Quota limit on the subdirectory "subdir1"

        g.log.info("Set Quota Limit on the path %s of the volume %s", path1,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path1,
                                      limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path1, self.volname))
        g.log.info(
            "Successfully set the Quota limit on %s of the volume "
            "%s", path1, self.volname)

        # Set Quota limit on the subdirectory "subdir2"

        g.log.info("Set Quota Limit on the path %s of the volume %s", path2,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path2,
                                      limit="2GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path2, self.volname))
        g.log.info(
            "Successfully set the Quota limit on %s of the volume "
            "%s", path2, self.volname)

        # Get Quota List on the volume

        g.log.info("Get Quota list on the volume %s", self.volname)
        quota_list = quota_fetch_list(self.mnode, self.volname)

        self.assertIsNotNone(quota_list, ("Failed to get the quota list "
                                          "of the volume %s", self.volname))

        # Check for subdir1 path in quota list

        self.assertIn(
            path1, quota_list.keys(),
            ("%s not part of the quota list %s even if "
             "it is set on the volume %s", path1, quota_list, self.volname))

        # Check for subdir2 path in quota list

        self.assertIn(
            path2, quota_list.keys(),
            ("%s not part of the quota list %s even if "
             "it is set on the volume %s", path2, quota_list, self.volname))
        g.log.info("Successfully listed quota list %s of the "
                   "volume %s", quota_list, self.volname)

        # Create near to 1GB of data on both subdir mounts

        for mount_object in self.subdir_mounts:
            g.log.info("Creating Files on %s:%s", mount_object.client_system,
                       mount_object.mountpoint)
            cmd = ("cd %s ; for i in `seq 1 1023` ;"
                   "do dd if=/dev/urandom of=file$i bs=1M "
                   "count=1;done" % (mount_object.mountpoint))
            ret, _, _ = g.run(mount_object.client_system, cmd)
            self.assertEqual(ret, 0, "Failed to create files on mountpoint")
            g.log.info("Files created successfully on mountpoint")

        # Again Get Quota List on the volume

        g.log.info("Get Quota list on the volume %s", self.volname)
        quota_list = quota_fetch_list(self.mnode, self.volname)

        self.assertIsNotNone(quota_list, ("Failed to get the quota list "
                                          "of the volume %s", self.volname))

        # Check for subdir1 path in quota list

        self.assertIn(
            path1, quota_list.keys(),
            ("%s not part of the quota list %s even if "
             "it is set on the volume %s", path1, quota_list, self.volname))

        # Check for subdir2 path in quota list

        self.assertIn(
            path2, quota_list.keys(),
            ("%s not part of the quota list %s even if "
             "it is set on the volume %s", path2, quota_list, self.volname))
        g.log.info("Successfully listed quota list %s of the "
                   "volume %s", quota_list, self.volname)

        # Again run IO's to check if quota limit is adhere for subdir1

        # Start IO's on subdir1
        g.log.info("Creating Files on %s:%s", self.clients[0],
                   self.subdir_mounts[0].mountpoint)
        cmd = ("cd %s ; for i in `seq 1024 1500` ;"
               "do dd if=/dev/urandom of=file$i bs=1M "
               "count=1;done" % (self.subdir_mounts[0].mountpoint))
        ret, _, _ = g.run(self.clients[0], cmd)
        if ret == 0:
            raise ExecutionError("IO was expected to Fail."
                                 "But it got passed")
        else:
            g.log.info(
                "IO's failed as expected on %s:%s as quota "
                "limit reached already", self.clients[0],
                self.subdir_mounts[0].mountpoint)

        # Start IO's on subdir2
        g.log.info("Creating Files on %s:%s", self.clients[1],
                   self.subdir_mounts[1].mountpoint)
        cmd = ("cd %s ; for i in `seq 1024 1500` ;"
               "do dd if=/dev/urandom of=file$i bs=1M "
               "count=1;done" % (self.subdir_mounts[1].mountpoint))
        ret, _, _ = g.run(self.clients[1], cmd)
        self.assertEqual(ret, 0,
                         ("Failed to create files on %s" % self.clients[1]))
        g.log.info("Files created successfully on %s:%s", self.clients[1],
                   self.subdir_mounts[1].mountpoint)
Beispiel #35
0
    def test_afr_reset_brick_heal_full(self):
        """
         1. Create files/dirs from mount point
         2. With IO in progress execute reset-brick start
         3. Now format the disk from back-end, using rm -rf <brick path>
         4. Execute reset brick commit and check for the brick is online.
         5. Issue volume heal using "gluster vol heal <volname> full"
         6. Check arequal for all bricks to verify all backend bricks
            including the resetted brick have same data
        """
        self.all_mounts_procs = []
        for count, mount_obj in enumerate(self.mounts):
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 3 --dir-length 5 "
                   "--max-num-of-dirs 5 --num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        all_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume")
        brick_to_reset = choice(all_bricks)

        # Start reset brick
        ret, _, err = reset_brick(self.mnode,
                                  self.volname,
                                  src_brick=brick_to_reset,
                                  option="start")
        self.assertEqual(ret, 0, err)
        g.log.info("Reset brick: %s started", brick_to_reset)

        # Validate the brick is offline
        ret = are_bricks_offline(self.mnode, self.volname, [brick_to_reset])
        self.assertTrue(ret, "Brick:{} is still online".format(brick_to_reset))

        # rm -rf of the brick directory
        node, brick_path = brick_to_reset.split(":")
        ret = rmdir(node, brick_path, force=True)
        self.assertTrue(
            ret, "Unable to delete the brick {} on "
            "node {}".format(brick_path, node))

        # Reset brick commit
        ret, _, err = reset_brick(self.mnode,
                                  self.volname,
                                  src_brick=brick_to_reset,
                                  option="commit")
        self.assertEqual(ret, 0, err)
        g.log.info("Reset brick committed successfully")

        # Check the brick is online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Few volume processess are offline for the "
            "volume: {}".format(self.volname))

        # Trigger full heal
        ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, "Unable  to trigger the heal full command")

        # Wait for the heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Heal didn't complete in 20 mins time")

        # Validate io on the clients
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on the mounts")
        self.all_mounts_procs *= 0

        # Check arequal of the back-end bricks after heal completion
        all_subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        for subvol in all_subvols:
            ret, arequal_from_subvol = collect_bricks_arequal(subvol)
            self.assertTrue(
                ret, "Arequal is collected successfully across the"
                " bricks in the subvol {}".format(subvol))
            self.assertEqual(
                len(set(arequal_from_subvol)), 1, "Arequal is "
                "same on all the bricks in the subvol")
    def test_self_heal_and_add_brick_with_data_from_diff_users(self):
        """
        Test case:
        1. Created a 2X3 volume.
        2. Mount the volume using FUSE and give 777 permissions to the mount.
        3. Added a new user.
        4. Login as new user and created 100 files from the new user:
           for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done
        5. Kill a brick which is part of the volume.
        6. On the mount, login as root user and create 1000 files:
           for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done
        7. On the mount, login as new user, and copy existing data to
           the mount.
        8. Start volume using force.
        9. While heal is in progress, add-brick and start rebalance.
        10. Wait for rebalance and heal to complete,
        11. Check for MSGID: 108008 errors in rebalance logs.
        """
        # Change permissions of mount point to 777
        ret = set_file_permissions(self.first_client, self.mountpoint,
                                   '-R 777')
        self.assertTrue(ret, "Unable to change mount point permissions")
        g.log.info("Mount point permissions set to 777")

        # Create 100 files from non-root user
        cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom "
               "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0],
                                                            self.mountpoint))
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create files from non-root user")

        # Kill one brick which is part of the volume
        self._bring_bricks_offline()

        # Create 1000 files from root user
        cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i"
               " bs=10M count=1;done" % self.mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to creare files from root user")

        # On the mount, login as new user, and copy existing data to
        # the mount
        cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/"
               "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;"
               "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint))
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to copy files from non-root user")

        # Check if there are files to be healed
        self._check_if_there_are_files_to_be_healed()

        # Start the vol using force
        self._restart_volume_and_bring_all_offline_bricks_online()

        # Add bricks to volume and wait for heal to complete
        self._expand_volume_and_wait_for_rebalance_to_complete()

        # Wait for heal to complete
        self._wait_for_heal_to_completed()

        # Check for MSGID: 108008 errors in rebalance logs
        particiapting_nodes = []
        for brick in get_all_bricks(self.mnode, self.volname):
            node, _ = brick.split(':')
            particiapting_nodes.append(node)

        for server in particiapting_nodes:
            ret = occurences_of_pattern_in_file(
                server, "MSGID: 108008",
                "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
            self.assertEqual(ret, 0,
                             "[Input/output error] present in rebalance log"
                             " file")
        g.log.info("Expanding volume successful and no MSGID: 108008 "
                   "errors see in rebalance logs")
Beispiel #37
0
    def test_shd_should_not_crash_executed_heal_info(self):
        """
        - set "entry-self-heal", "metadata-self-heal", "data-self-heal" to off
        - write a few files
        - bring down brick0
        - add IO
        - do a heal info and check for files pending heal on last 2 bricks
        - set "performance.enable-least-priority" to "enable"
        - bring down brick1
        - set the "quorum-type" to "fixed"
        - add IO
        - do a heal info and check for files pending heal on the last brick
        """
        # pylint: disable=too-many-statements
        bricks_list = get_all_bricks(self.mnode, self.volname)
        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_files -f 10 "
                       "--fixed-file-size 1M %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring brick0 offline
        g.log.info('Bringing bricks %s offline', bricks_list[0])
        ret = bring_bricks_offline(self.volname, bricks_list[0])
        self.assertTrue(ret,
                        'Failed to bring bricks %s offline' % bricks_list[0])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[0])
        g.log.info('Bringing bricks %s offline is successful', bricks_list[0])

        # Creating files on client side
        number_of_files_one_brick_off = '1000'
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_files "
                       "-f %s "
                       "--fixed-file-size 1k "
                       "--base-file-name new_file "
                       "%s" %
                       (self.script_upload_path, number_of_files_one_brick_off,
                        mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Get heal info
        g.log.info("Getting heal info...")
        heal_info_data = get_heal_info_summary(self.mnode, self.volname)
        self.assertIsNotNone(heal_info_data, 'Failed to get heal info.')
        g.log.info('Success in getting heal info')

        # Check quantity of file pending heal
        for brick in bricks_list[1:]:
            self.assertEqual(heal_info_data[brick]['numberOfEntries'],
                             str(int(number_of_files_one_brick_off) + 1),
                             'Number of files pending heal is not correct')

        # Setting options
        g.log.info('Setting options...')
        options = {"performance.enable-least-priority": "enable"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Bring brick1 offline
        g.log.info('Bringing bricks %s offline', bricks_list[1])
        ret = bring_bricks_offline(self.volname, bricks_list[1])
        self.assertTrue(ret,
                        'Failed to bring bricks %s offline' % bricks_list[1])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[1])
        g.log.info('Bringing bricks %s offline is successful', bricks_list[1])

        # Setting options
        g.log.info('Setting options...')
        options = {"quorum-type": "fixed"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Creating files on client side
        number_of_files_two_brick_off = '100'
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_files "
                       "-f %s "
                       "--fixed-file-size 1k "
                       "--base-file-name new_new_file "
                       "%s" %
                       (self.script_upload_path, number_of_files_two_brick_off,
                        mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Get heal info
        g.log.info("Getting heal info...")
        heal_info_data = get_heal_info_summary(self.mnode, self.volname)
        self.assertIsNotNone(heal_info_data, 'Failed to get heal info.')
        g.log.info('Success in getting heal info')

        # Check quantity of file pending heal
        number_of_files_to_check = str(
            int(number_of_files_one_brick_off) +
            int(number_of_files_two_brick_off) + 1)
        self.assertEqual(heal_info_data[bricks_list[-1]]['numberOfEntries'],
                         number_of_files_to_check,
                         'Number of files pending heal is not correct')
    def test_self_heal_differing_in_file_type(self):
        """
        testing self heal of files with different file types
        with default configuration

        Description:
        - create IO
        - calculate arequal
        - bring down all bricks processes from selected set
        - calculate arequal and compare with arequal before
        getting bricks offline
        - modify the data
        - arequal before getting bricks online
        - bring bricks online
        - check daemons and healing completion
        - start healing
        - calculate arequal and compare with arequal before bringing bricks
        online and after bringing bricks online
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Creating files on client side
        all_mounts_procs = []
        test_file_type_differs_self_heal_folder = \
            'test_file_type_differs_self_heal'
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)

        # Creating files
        command = ("cd %s/ ; "
                   "mkdir %s ;"
                   "cd %s/ ;"
                   "for i in `seq 1 10` ; "
                   "do mkdir l1_dir.$i ; "
                   "for j in `seq 1 5` ; "
                   "do mkdir l1_dir.$i/l2_dir.$j ; "
                   "for k in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k "
                   "bs=1k count=$k ; "
                   "done ; "
                   "done ; "
                   "done ; " % (self.mounts[0].mountpoint,
                                test_file_type_differs_self_heal_folder,
                                test_file_type_differs_self_heal_folder))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # wait for io to complete
        self.assertTrue(wait_for_io_to_complete(all_mounts_procs, self.mounts),
                        "Io failed to complete on some of the clients")

        # Get arequal before getting bricks offline
        g.log.info('Getting arequal before getting bricks offline...')
        ret, result_before_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(
            filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                          bricks_to_bring_offline_dict['cold_tier_bricks'] +
                          bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Get arequal after getting bricks offline
        g.log.info('Getting arequal after getting bricks offline...')
        ret, result_after_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks offline '
                   'is successful')

        # Checking arequals before bringing bricks offline
        # and after bringing bricks offline
        self.assertItemsEqual(
            result_before_offline, result_after_offline,
            'Checksums before and after '
            'bringing bricks offline are not equal')
        g.log.info('Checksums before and after '
                   'bringing bricks offline are equal')

        # Modify the data
        all_mounts_procs = []
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        command = ("cd %s/%s/ ; "
                   "for i in `seq 1 10` ; "
                   "do for j in `seq 1 5` ; "
                   "do for k in `seq 1 10` ; "
                   "do rm -f l1_dir.$i/l2_dir.$j/test.$k ; "
                   "mkdir l1_dir.$i/l2_dir.$j/test.$k ; "
                   "done ; "
                   "done ; "
                   "done ;" % (self.mounts[0].mountpoint,
                               test_file_type_differs_self_heal_folder))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks online
        # and after bringing bricks online
        self.assertItemsEqual(
            result_before_online, result_after_online, 'Checksums before and '
            'after bringing bricks online are not equal')
        g.log.info('Checksums before and after bringing bricks online '
                   'are equal')
Beispiel #39
0
 def _create_soft_links_to_directories(self):
     """Create soft links to directories"""
     cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s "
            "dir.$i sym_link_dir.$i; done".format(self.mountpoint))
     ret, _, _ = g.run(self.first_client, cmd)
     self.assertFalse(ret, "Failed to create soft links to dirs")
    def test_auth_reject_allow(self):
        """
        Verify auth.reject and auth.allow volume options in volume level using
        both client ip and hostname.
        Verify auth.reject and auth.allow volume options in sub-directory
        level using both client ip and hostname.
        Steps:
        1. Create and start volume.
        2. Set auth.reject on volume for client1 using ip of client1.
        3. Set auth.allow on volume for client2 using ip of client2.
        4. Try to mount volume on client1. This should fail.
        5. Check the client1 log for AUTH_FAILED event.
        6. Mount volume on client2.
        7. Unmount the volume from client2.
        8. Set auth.reject on volume for client1 using hostname of client1.
        9. Set auth.allow on volume for client2 using hostname of client2.
        10. Repeat steps 4 to 6
        11. Create directory d1 on client2 mountpoint.
        12. Unmount the volume from client2.
        13. Set auth.reject on d1 for client1 using ip of client1.
        14. Set auth.allow on d1 for client2 using ip of client2.
        15. Try to mount d1 on client1. This should fail.
        16. Check the client1 log for AUTH_FAILED event.
        17. Mount d1 on client2.
        18. Unmount d1 from client2.
        19. Set auth.reject on d1 for client1 using hostname of client1.
        20. Set auth.allow on d1 for client2 using hostname of client2.
        21. Repeat steps 15 to 18.
        """
        # pylint: disable = too-many-statements
        # Setting auth.reject on volume for client1 using ip
        auth_dict = {'all': [self.mounts[0].client_system]}
        ret = set_auth_reject(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.reject volume option.")
        g.log.info("Successfully set auth.reject option on volume")

        # Setting auth.allow on volume for client2 using ip
        auth_dict = {'all': [self.mounts[1].client_system]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.allow volume option")
        g.log.info("Successfully set auth.allow option on volume")

        # Trying to mount volume on client1
        self.unauthenticated_mount(self.mounts[0])

        # Verify whether mount failure on client1 is due to auth error
        log_msg = self.is_auth_failure(self.mounts[0].client_system)
        prev_log_statement = log_msg

        # Mounting volume on client2
        self.authenticated_mount(self.mounts[1])

        g.log.info("Verification of auth.reject and auth.allow options on "
                   "volume using client IP is successful")

        # Unmount volume from client2
        ret = self.mounts[1].unmount()
        self.assertTrue(ret, ("Failed to unmount volume %s from client %s" %
                              (self.volname, self.mounts[1].client_system)))

        # Obtain hostname of client1
        ret, hostname_client1, _ = g.run(self.mounts[0].client_system,
                                         "hostname")
        self.assertEqual(ret, 0, ("Failed to obtain hostname of client %s" %
                                  self.mounts[0].client_system))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mounts[0].client_system, hostname_client1.strip())

        # Obtain hostname of client2
        ret, hostname_client2, _ = g.run(self.mounts[1].client_system,
                                         "hostname")
        self.assertEqual(ret, 0, ("Failed to obtain hostname of client %s" %
                                  self.mounts[1].client_system))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mounts[1].client_system, hostname_client2.strip())

        # Setting auth.reject on volume for client1 using hostname
        auth_dict = {'all': [hostname_client1.strip()]}
        ret = set_auth_reject(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.reject volume option.")
        g.log.info("Successfully set auth.reject option on volume")

        # Setting auth.allow on volume for client2 using hostname
        auth_dict = {'all': [hostname_client2.strip()]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.allow volume option")
        g.log.info("Successfully set auth.allow option on volume")

        # Trying to mount volume on client1
        self.unauthenticated_mount(self.mounts[0])

        # Verify whether mount failure on client1 is due to auth error
        log_msg = self.is_auth_failure(self.mounts[0].client_system,
                                       prev_log_statement)
        prev_log_statement = log_msg

        # Mounting volume on client2
        self.authenticated_mount(self.mounts[1])

        g.log.info("Verification of auth.reject and auth.allow options on "
                   "volume using client hostname is successful")

        # Creating sub directory d1 on mounted volume
        ret = mkdir(self.mounts[1].client_system,
                    "%s/d1" % self.mounts[1].mountpoint)
        self.assertTrue(ret, ("Failed to create directory 'd1' in volume %s "
                              "from client %s" %
                              (self.volname, self.mounts[1].client_system)))

        # Unmount volume from client2
        ret = self.mounts[1].unmount()
        self.assertTrue(ret, ("Failed to unmount volume %s from client %s" %
                              (self.volname, self.mounts[1].client_system)))

        # Setting auth.reject on d1 for client1 using ip
        auth_dict = {'/d1': [self.mounts[0].client_system]}
        ret = set_auth_reject(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.reject volume option.")
        g.log.info("Successfully set auth.reject option.")

        # Setting auth.allow on d1 for client2 using ip
        auth_dict = {'/d1': [self.mounts[1].client_system]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.allow volume option")
        g.log.info("Successfully set auth.allow option.")

        # Creating mount object for sub-directory mount on client1
        mount_obj_client1 = copy.deepcopy(self.mounts[0])
        mount_obj_client1.volname = "%s/d1" % self.volname

        # Creating mount object for sub-directory mount on client2
        mount_obj_client2 = copy.deepcopy(self.mounts[1])
        mount_obj_client2.volname = "%s/d1" % self.volname

        # Trying to mount d1 on client1
        self.unauthenticated_mount(mount_obj_client1)

        # Verify whether mount failure on client1 is due to auth error
        log_msg = self.is_auth_failure(mount_obj_client1.client_system,
                                       prev_log_statement)
        prev_log_statement = log_msg

        # Mounting d1 on client2
        self.authenticated_mount(mount_obj_client2)

        g.log.info("Verification of auth.reject and auth.allow options on "
                   "sub-directory level using client IP is successful")

        # Unmount d1 from client2
        ret = mount_obj_client2.unmount()
        self.assertTrue(
            ret,
            ("Failed to unmount %s from client %s" %
             (mount_obj_client2.volname, mount_obj_client2.client_system)))

        # Setting auth.reject on d1 for client1 using hostname
        auth_dict = {'/d1': [hostname_client1.strip()]}
        ret = set_auth_reject(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.reject volume option.")
        g.log.info("Successfully set auth.reject option.")

        # Setting auth.allow on d1 for client2 using hostname
        auth_dict = {'/d1': [hostname_client2.strip()]}
        ret = set_auth_allow(self.volname, self.mnode, auth_dict)
        self.assertTrue(ret, "Failed to set auth.allow volume option")
        g.log.info("Successfully set auth.allow option.")

        # Trying to mount d1 on client1
        self.unauthenticated_mount(mount_obj_client1)

        # Verify whether mount failure on client1 is due to auth error
        self.is_auth_failure(mount_obj_client1.client_system,
                             prev_log_statement)

        # Mounting d1 on client2
        self.authenticated_mount(mount_obj_client2)

        g.log.info("Verification of auth.reject and auth.allow options on "
                   "sub-directory level using client hostname is successful")

        # Unmount d1 from client2
        ret = mount_obj_client2.unmount()
        self.assertTrue(
            ret,
            ("Failed to unmount %s from client %s" %
             (mount_obj_client2.volname, mount_obj_client2.client_system)))
Beispiel #41
0
 def run_async(cmd, hostname, raise_on_error=True):
     return g.run_async(host=hostname, command=cmd)
def gfind_query(mnode, volname, outfile='', since='', end='', **kwargs):
    """Get a list of changed files based on a specific timestamp.

    Args:
        mnode (str): Node on which cmd has to be executed.
        volname (str): volume name
        outfile (str): This is the incremental list of modified files.

    Kwargs:
        since (int): Timestamp from which the files need to be retrieved.
        end (int): Timestamp until which the files need to be retrieved.

        **kwargs:
            The keys, values in kwargs are:
                - full: (bool)|False
                - tagforfullfind: (str)|None
                - gftype: (str)|None
                - outprefix: (str)|None
                - fieldsep: (str)|None
                - debug: (bool)|False
                - noencode: (bool)|False
                - disablepartial: (bool)|False
                - namespace: (bool)|False

        Where:
        full (bool): If this option is set to True, then the command will be
            run with '--full' option and a full find will be performed.
            If this option is set to False, then the command will be run
            without the '--full' option.
        tagforfullfind (str): When running the command with '--full' option,
            a subset of files can be retrieved according to a tag.
        gftype (str): 'Type' option specifies the finding the list of files
            or directories only. If the value is set to 'f' then only the file
            types will be listed. If the value is set to 'd' then only the
            directory types will be listed. If the value is set to 'both' then
            the files and directories both will be listed.
        outprefix (str): Prefix to the path/name specified in the outfile.
        fieldsep (str): field-separator specifies the character/s that
            glusterfind output uses to separate fields
        debug (bool): If this option is set to True, then
            the command will be run with debug mode. If this option is
            set to False, then the command will not be run with debug mode.
        noencode (bool): If this option is set to True, then it disables
            encoding of file paths. If this option is set to False, then the
            command will run without --no-encode option.
        disablepartial (bool): If this option is set to True, then the
            partial-find feature will be disabled. If this option is set to
            False, then the default value will be respected.
        namespace (bool): If this option is set to True, then the command
            will be run with '--N' option and only namespace changes will
            be listed. If this option is set to False, then the command will
            be run without the '--N' option.

    Returns:
        tuple: Tuple containing three elements (ret, out, err).
            The first element 'ret' is of type 'int' and is the return value
            of command execution.

            The second element 'out' is of type 'str' and is the stdout value
            of the command execution.

            The third element 'err' is of type 'str' and is the stderr value
            of the command execution.

            (-1, None, None): If an invalid option is used in the command.

    Example1:
        gfind_query("abc.com", testvol, outfile=/newoutfile.txt,
                    since=timestamp1, end=timestamp2, full=False)
    Example2:
        gfind_query("abc.com", testvol, outfile=/newoutfile.txt, gftype='f')
            The above example will fail because the
            'full' option is not provided.
    """

    outprefix = fieldsep = tagforfullfind = gftype = None
    full = debug = noencode = disablepartial = namespace = False
    params = ''

    if 'outprefix' in kwargs:
        outprefix = str(kwargs['outprefix'])

    if 'fieldsep' in kwargs:
        fieldsep = str(kwargs['fieldsep'])

    if 'full' in kwargs:
        full = bool(kwargs['full'])

    if 'tagforfullfind' in kwargs:
        tagforfullfind = str(kwargs['tagforfullfind'])

    if 'gftype' in kwargs:
        gftype = str(kwargs['gftype'])

    if 'debug' in kwargs:
        debug = bool(kwargs['debug'])

    if 'noencode' in kwargs:
        noencode = bool(kwargs['noencode'])

    if 'disablepartial' in kwargs:
        disablepartial = bool(kwargs['disablepartial'])

    if 'namespace' in kwargs:
        namespace = bool(kwargs['namespace'])

    if full and since != "" and end != "":
        g.log.error("Invalid command: Glusterfind query accepts either full or"
                    " the since/end timestamps")
        return (-1, None, None)

    if outfile == '':
        g.log.error("Invalid command: Outfile needs to be provided in order"
                    " for the query command to run")
        return (-1, None, None)

    if outfile != '':
        params = params + (" %s" % outfile)

    if not full:
        if since != '':
            params = params + (" --since-time %s" % since)
        if end != '':
            params = params + (" --end-time %s" % end)
        if gftype:
            if gftype == 'both':
                params = params + ' --type both'
            else:
                g.log.error("Invalid command: The '--type' option with 'f' or "
                            "'d' as values can only be used along with "
                            "'--full' option")
                return (-1, None, None)

    if not gftype:
        gftype = ''

    if full:
        params = params + ' --full'

        gftypelist = ['f', 'd', 'both', '']
        if gftype in gftypelist:
            if gftype != '':
                params = params + (" --type %s" % gftype)
        else:
            g.log.error("Invalid value for the '--type' option of the "
                        "glusterfind query command. Choose among 'f/d/both'.")
            return (-1, None, None)

        if tagforfullfind:
            params = params + (" --tag-for-full-find %s" % tagforfullfind)

    if outprefix:
        params = params + (" --output-prefix %s" % outprefix)

    if fieldsep:
        params = params + (" --field-separator '%s'" % fieldsep)

    if debug:
        params = params + ' --debug'

    if noencode:
        params = params + ' --no-encode'

    if disablepartial:
        params = params + ' --disable-partial'

    if namespace:
        params = params + ' -N'

    cmd = "glusterfind query %s %s" % (volname, params)
    return g.run(mnode, cmd)
Beispiel #43
0
    def test_snap_delete_existing_scheduler(self):
        # pylint: disable=too-many-statements
        """
        Steps:
        1. enable shared volume
        2. create a volume
        3. initialise snap scheduler on all nodes
        4. enable snap scheduler
        5. check snap scheduler status
        6. perform io on mounts
        7. schedule a job of creating snapshot
           every 30 mins
        8. list jobs created
        9. delete scheduled job
        10. validate io is successful
        11. list job should not list
            any existing snapshot jobs
        """

        # Initialise snap scheduler
        g.log.info("Initialising snap scheduler on all servers")
        count = 0
        while count < 80:
            ret = scheduler_init(self.servers)
            if ret:
                break
            time.sleep(2)
            count += 1
        self.assertTrue(ret, "Failed to initialise scheduler on all servers")
        g.log.info("Successfully initialised scheduler on all servers")

        # Enable snap scheduler
        g.log.info("Enabling snap scheduler")
        ret, _, _ = scheduler_enable(self.mnode)
        self.assertEqual(ret, 0,
                         "Failed to enable scheduler on node %s" % self.mnode)
        g.log.info("Successfully enabled scheduler on node %s", self.mnode)

        # Check snapshot scheduler status
        g.log.info("checking status of snapshot scheduler")
        for server in self.servers:
            count = 0
            while count < 40:
                ret, status, _ = scheduler_status(server)
                if status.strip().split(":")[2] == ' Enabled':
                    break
                time.sleep(2)
                count += 2
        self.assertEqual(status.strip().split(":")[2], ' Enabled',
                         "Failed to check status of scheduler")
        g.log.info("Successfully checked scheduler status")

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 10 --base-file-name file %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # add a job to schedule snapshot every 30 mins
        g.log.info("Starting to add new job")
        self.scheduler = r"*/30 * * * *"
        self.job_name = "Job1"
        ret, _, _ = scheduler_add_jobs(self.mnode, self.job_name,
                                       self.scheduler, self.volname)
        self.assertEqual(ret, 0, "Failed to add job")
        g.log.info("Successfully added Job on volume %s", self.volname)

        # scheduler list
        g.log.info("Starting to list all scheduler jobs")
        ret, _, _ = scheduler_list(self.mnode)
        self.assertEqual(ret, 0, "Failed to list scheduler jobs")
        g.log.info("Successfully listed all jobs")

        # delete scheduled job
        g.log.info("Starting to delete scheduled jobs")
        ret, _, _ = scheduler_delete(self.mnode, self.job_name)
        self.assertEqual(ret, 0, "Failed to delete scheduled job")
        g.log.info("Successfully deleted scheduled job %s", self.job_name)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # scheduler list (no active jobs should be there)
        g.log.info("Starting to list all scheduler jobs")
        ret, out, _ = scheduler_list(self.mnode)
        self.assertEqual(ret, 0, "Failed to list scheduler jobs")
        ret1 = out.strip().split(":")
        self.assertEqual(
            ret1[1], " No snapshots scheduled", "Unexpected:"
            "Failed to delete scheduled job %s" % self.job_name)
        g.log.info("Expected: No snapshots Jobs scheduled")
Beispiel #44
0
def run_fio(servers, directory_to_run):
    """
    Module to run fio test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_fio(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running fio tests on %s" % ','.join(servers))
    rt = True

    # Installing fio if not installed
    results = g.run_parallel(servers, "yum list installed fio")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(server,
                                "yum list installed fio || "
                                "yum -y install fio")
            if ret != 0:
                g.log.error("Failed to install bonnie on %s" % server)
                return False

        # building job file for running fio
        # TODO: parametrizing the fio and to get input values from user
        job_file = "/tmp/fio_job.ini"
        cmd = ("echo -e '[global]\nrw=randrw\nio_size=1g\nfsync_on_close=1\n"
               "size=4g\nbs=64k\nrwmixread=20\nopenfiles=1\nstartdelay=0\n"
               "ioengine=sync\n[write]\ndirectory=%s\nnrfiles=1\n"
               "filename_format=fio_file.$jobnum.$filenum\nnumjobs=8' "
               "> %s" % (directory_to_run[index], job_file))

        ret, _, _ = g.run(server, cmd)
        if ret != 0:
            g.log.error("Failed to create fio job file")
            rt = False

    proc_list = []
    for index, server in enumerate(servers):
        fio_command = "fio %s" % (job_file)
        proc = g.run_async(server, fio_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("fio test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s/fio_file.*"
                            % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s" % job_file)
        if ret != 0:
            g.log.error("Failed to remove job file from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove fio")
        if ret != 0:
            g.log.error("Failed to remove fio from %s" % server)
            return False
    return rt
Beispiel #45
0
def get_profile_info(mnode, volname, options=''):
    """Fetches the volume profile information as displayed in the volume
        profile info.
        Uses xml output of volume profile info and parses the
        into to a dict

    Args:
        mnode (str): Node on which cmd has to be executed.
        volname (str): Volume for which profile info has to be retrived.

    Kwargs:
        options (str): Options can be
        [peek|incremental [peek]|cumulative|clear].If not given the
        function returns the output of gluster volume profile <volname> info.

    Returns:
        NoneType: If there are errors.
        dict: Volume profile info in dict of dicts

    Example:
        get_profile_info(mnode, "testvol")
    """

    if not check_profile_options(options):
        return None

    cmd = "gluster volume profile %s info %s --xml" % (volname, options)
    ret, out, err = g.run(mnode, cmd, log_level='DEBUG')
    if ret:
        g.log.error("Profile not running on volume.")
        return None

    # Iterating through the XML and creating dict
    root = etree.XML(out)
    volprofileinfo = {}
    volume = root.find("volProfile")
    brick_counter = 0
    for elem in volume:
        if elem.tag == "volname":
            volname = elem.text
            volprofileinfo[volname] = {}
        elif elem.tag == "brick":
            brick_counter += 1
            volprofileinfo[volname][elem.tag + str(brick_counter)] = {}
            brick_dict = volprofileinfo[volname][elem.tag + str(brick_counter)]
            for brick_tag in elem:
                if 'cumulativeStats' == brick_tag.tag:
                    brick_dict["cumulativeStats"] = {}
                    for el in brick_tag:
                        if el.tag == 'duration':
                            brick_dict["cumulativeStats"][el.tag] = el.text
                        elif el.tag == 'totalWrite' or el.tag == 'totalRead':
                            brick_dict["cumulativeStats"][el.tag] = el.text
                        elif el.tag == 'blockStats':
                            brick_dict["cumulativeStats"][el.tag] = {}
                            block_dict = brick_dict["cumulativeStats"][el.tag]
                            counter = 0
                            for block in el:
                                counter += 1
                                block_dict[block.tag + str(counter)] = {}
                                elm_dict = block_dict[block.tag + str(counter)]
                                for block_elm in block:
                                    elm_dict[block_elm.tag] = block_elm.text
                        elif el.tag == 'fopStats':
                            brick_dict["cumulativeStats"][el.tag] = {}
                            fop_dict = brick_dict["cumulativeStats"][el.tag]
                            fop_count = 0
                            for fops in el:
                                fop_dict['fop' + str(fop_count)] = {}
                                fop_param = fop_dict['fop' + str(fop_count)]
                                for fop in fops:
                                    fop_param[fop.tag] = fop.text
                                fop_count += 1
                elif 'intervalStats' == brick_tag.tag:
                    brick_dict["intervalStats"] = {}
                    for el in brick_tag:
                        if el.tag == 'duration':
                            brick_dict["intervalStats"][el.tag] = el.text
                        elif el.tag == 'totalWrite' or el.tag == 'totalRead':
                            brick_dict["intervalStats"][el.tag] = el.text
                        elif el.tag == 'blockStats':
                            brick_dict["intervalStats"][el.tag] = {}
                            block_dict = brick_dict["intervalStats"][el.tag]
                            counter = 0
                            for block in el:
                                counter += 1
                                block_dict[block.tag + str(counter)] = {}
                                elm_dict = block_dict[block.tag + str(counter)]
                                for block_elm in block:
                                    elm_dict[block_elm.tag] = block_elm.text
                        elif el.tag == 'fopStats':
                            brick_dict["intervalStats"][el.tag] = {}
                            fop_dict = brick_dict["intervalStats"][el.tag]
                            fop_count = 0
                            for fops in el:
                                fop_dict['fop' + str(fop_count)] = {}
                                fop_param = fop_dict['fop' + str(fop_count)]
                                for fop in fops:
                                    fop_param[fop.tag] = fop.text
                                fop_count += 1
                else:
                    brick_dict[brick_tag.tag] = brick_tag.text
        else:
            volprofileinfo[elem.tag] = elem.text

    g.log.debug("Volume profile info output: %s" %
                pformat(volprofileinfo, indent=10))
    return volprofileinfo
Beispiel #46
0
    def _testcase(self, number_of_expands=1):
        """
        Test case:
        1. Create a volume start it and mount on the client.
        2. Set full permission on the mount point.
        3. Add new user to the client.
        4. As the new user create dirs/files.
        5. Compute arequal checksum and check permission on / and subdir.
        6. expand cluster according to number_of_expands and start rebalance.
        7. After rebalance is completed:
        7.1 check arequal checksum
        7.2 verfiy no change in / and sub dir permissions.
        7.3 As the new user create and delete file/dir.
        """
        # Set full permissions on the mount point.
        ret = set_file_permissions(self.clients[0], self.mountpoint, "-R 777")
        self.assertTrue(ret, "Failed to set permissions on the mount point")
        g.log.info("Set full permissions on the mount point")

        # Create dirs/files as self.test_user
        cmd = (r'su -l %s -c "cd %s;'
               r'for i in {0..9}; do mkdir d\$i; done;'
               r'for i in {0..99}; do let x=\$i%%10;'
               r'dd if=/dev/urandom of=d\$x/f.\$i bs=1024 count=1; done"' %
               (self.user, self.mountpoint))
        ret, _, _ = g.run(self.client, cmd)
        self.assertEqual(ret, 0, ("Failed to create files as %s", self.user))
        g.log.info("IO as %s is successful", self.user)

        # check permission on / and subdir
        self._check_user_permission()

        # get arequal checksum before expand
        self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        self._logged_vol_info()

        # expand the volume
        for i in range(number_of_expands):
            ret = expand_volume(self.mnode, self.volname, self.servers,
                                self.all_servers_info)
            self.assertTrue(
                ret, ("Failed to expand iter %d volume %s", i, self.volname))

        self._logged_vol_info()
        # Start Rebalance and wait for completion
        self._start_rebalance_and_wait()

        # compare arequals checksum before and after rebalance
        self._get_arequal_and_check_if_equal_to_before()

        # permissions check on / and sub dir
        self._check_user_permission()

        # Create/Delete file as self.test_user
        cmd = ('su -l %s -c '
               '"cd %s; touch file.test;'
               'find . -mindepth 1 -maxdepth 1 -type d | xargs rm -rf"' %
               (self.user, self.mountpoint))
        ret, _, _ = g.run(self.client, cmd)

        self.assertEqual(ret, 0, ("User %s failed to create files", self.user))
        g.log.info("IO as %s is successful", self.user)
    def test_add_brick_followed_by_remove_brick(self):
        """
        Test case:
        1. Create a volume, start it and mount it to a client.
        2. Start I/O on volume.
        3. Add brick and trigger rebalance, wait for rebalance to complete.
           (The volume which was 1x3 should now be 2x3)
        4. Add brick and trigger rebalance, wait for rebalance to complete.
           (The volume which was 2x3 should now be 3x3)
        5. Remove brick from volume such that it becomes a 2x3.
        6. Remove brick from volume such that it becomes a 1x3.
        7. Wait for I/O to complete and check for any input/output errors in
           both client and rebalance logs.
        """
        # Start I/O on mount point
        self.all_mounts_procs = []
        cmd = ("/usr/bin/env python {} create_deep_dirs_with_files "
               "--dirname-start-num {} --dir-depth 5 --dir-length 5 "
               "--max-num-of-dirs 5 --num-of-files 5 {}".format(
                   self.script_upload_path, 10, self.mountpoint))
        proc = g.run_async(self.first_client, cmd)
        self.all_mounts_procs.append(proc)
        self.is_io_running = True

        # Convert 1x3 to 2x3 and then convert 2x3 to 3x3
        for _ in range(0, 2):
            self._add_brick_and_wait_for_rebalance_to_complete()

        # Convert 3x3 to 2x3 and then convert 2x3 to 1x3
        for _ in range(0, 2):
            self._remove_brick_from_volume()

        # Validate I/O processes running on the nodes
        ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]])
        self.is_io_running = False
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO on all mounts: Complete")

        # Check for Input/output errors in rebalance logs
        particiapting_nodes = []
        for brick in get_all_bricks(self.mnode, self.volname):
            node, _ = brick.split(':')
            particiapting_nodes.append(node)

        for server in particiapting_nodes:
            ret = occurences_of_pattern_in_file(
                server, "Input/output error",
                "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
            self.assertEqual(
                ret, 0, "[Input/output error] present in rebalance log"
                " file")

        # Check for Input/output errors in client logs
        ret = occurences_of_pattern_in_file(
            self.first_client, "Input/output error",
            "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname,
                                                      self.mount_type))
        self.assertEqual(ret, 0,
                         "[Input/output error] present in client log file")
        g.log.info("Expanding and shrinking volume successful and no I/O "
                   "errors see in rebalance and client logs")
Beispiel #48
0
    def test_heal_gfid_1x3(self):
        """
        Description: This test case verifies the gfid self-heal on a 1x3
                 replicate volume.
                 1. file created at mount point
                 2. 2 bricks brought down
                 3. file deleted
                 4. created a new file from the mount point
                 5. all bricks brought online
                 6. check if gfid worked correctly
        """

        g.log.info("setting the quorum type to fixed")
        options = {"cluster.quorum-type": "fixed"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "unable to set the quorum type to fixed")
        g.log.info("Successfully set the quorum type to fixed")

        g.log.info("creating a file from mount point")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 1 --base-file-name test_file --fixed-file-size 10k %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
        # Validate I/O
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("Successfully created a file from mount point")

        # getting list of all bricks
        all_bricks = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(all_bricks, "unable to get list of bricks")
        g.log.info("bringing down brick1 and brick2")
        ret = bring_bricks_offline(self.volname, all_bricks[:2])
        self.assertTrue(ret, "unable to bring bricks offline")
        g.log.info("Successfully brought the following bricks offline "
                   ": %s", str(all_bricks[:2]))

        g.log.info("deleting the file from mount point")
        command = "rm -f " + self.mounts[0].mountpoint + "/test_file1"
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "unable to remove file from mount point")
        g.log.info("Successfully deleted file from mountpoint")

        g.log.info("creating a new file of same name and different size "
                   "from mount point")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 1 --base-file-name test_file --fixed-file-size 1M %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
        # Validate I/O
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("Successfully created a new file of same name "
                   "from mount point")

        g.log.info("bringing bricks 1 and 2 back online")
        ret = bring_bricks_online(self.mnode, self.volname, all_bricks[:2])
        self.assertIsNotNone(ret, "unable to bring bricks online")
        g.log.info("Successfully brought the following bricks online "
                   ": %s", str(all_bricks[:2]))

        g.log.info("checking if stat structure of the file is returned")
        ret = get_file_stat(self.mounts[0].client_system,
                            self.mounts[0].mountpoint + '/test_file0.txt')
        self.assertTrue(ret, "unable to get file stats")
        g.log.info("file stat structure returned successfully")

        g.log.info("checking if the heal has completed")
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, "heal not completed")
        g.log.info("Self heal was completed successfully")

        g.log.info("checking if the areequal checksum of all the bricks in "
                   "the subvol match")
        checksum_list = []
        for brick in all_bricks:
            node, brick_path = brick.split(':')
            command = "arequal-checksum -p " + brick_path + \
                      " -i .glusterfs -i .landfill"
            ret, out, _ = g.run(node, command)
            self.assertEqual(
                ret, 0, "unable to get the arequal checksum "
                "of the brick")
            checksum_list.append(out)
            # checking file size of healed file on each brick to verify
            # correctness of choice for sink and source
            stat_dict = get_file_stat(node, brick_path + '/test_file0.txt')
            self.assertEqual(
                stat_dict['size'], '1048576',
                "file size of healed file is different "
                "than expected")
        flag = all(val == checksum_list[0] for val in checksum_list)
        self.assertTrue(flag, "the arequal checksum of all bricks is"
                        "not same")
        g.log.info("the arequal checksum of all the bricks in the subvol "
                   "is same")
Beispiel #49
0
def enable_mounting_volume_over_smb(mnode, volname, smb_users_info):
    """Enable mounting volume over SMB. Set ACL's for non-root users.

    Args:
        mnode (str): Node on which commands are executed.
        volname (str): Name of the volume on which acl's has to be set.
        smb_users_info (dict): Dict containing users info. Example:
            smb_users_info = {
                'root': {'password': '******',
                         'acl': ''
                         },
                'user1': {'password': '******',
                          'acl': ''
                          },
                'user2': {'password': '******',
                          'acl': ''
                          }
                }
    Returns:
        bool: True on successfully enabling to mount volume using SMB.
            False otherwise.
    """
    g.log.info("Enable mounting volume over SMB")
    # Create a temp mount to provide required permissions to the smb user
    mount = {
        'protocol': 'glusterfs',
        'server': mnode,
        'volname': volname,
        'client': {
            'host': mnode
        },
        'mountpoint': '/tmp/gluster_smb_set_user_permissions_%s' % volname,
        'options': 'acl'
    }
    mount_obj = GlusterMount(mount)
    ret = mount_obj.mount()
    if not ret:
        g.log.error("Unable to create temporary mount for providing "
                    "required permissions to the smb users")
        return False
    g.log.info("Successfully created temporary mount for providing "
               "required permissions to the smb users")

    # Provide required permissions to the smb user
    for smb_user in smb_users_info.keys():
        if smb_user != 'root':
            if 'acl' in smb_users_info[smb_user]:
                acl = smb_users_info[smb_user]['acl']
                if not acl:
                    acl = "rwx"
            else:
                acl = "rwx"

            cmd = ("setfacl -m user:%s:%s %s" %
                   (smb_user, acl, mount_obj.mountpoint))
            ret, _, _ = g.run(mnode, cmd)
            if ret != 0:
                g.log.error(
                    "Unable to provide required permissions to the "
                    "smb user %s ", smb_user)
                return False
            g.log.info(
                "Successfully provided required permissions to the "
                "smb user %s ", smb_user)

    # Verify SMB/CIFS share  can be accessed by the user

    # Unmount the temp mount created
    ret = mount_obj.unmount()
    if not ret:
        g.log.error("Unable to unmount the temp mount")
    g.log.info("Successfully unmounted the temp mount")

    return True
Beispiel #50
0
    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" %
               (self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(validate_io_procs([proc], self.mounts[0]),
                        "IO failed on some of the clients")

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname,
                                                             subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                   "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
Beispiel #51
0
 def tearDownClass(cls):
     """unittest tearDownClass override"""
     print "Tearing Down Class: %s" % cls.__name__
     g.run(cls.primary_host, 'rm -f /tmp/railetc')
     g.run(cls.primary_host, 'rm -f /tmp/upload_test_file')
     g.run(cls.hosts[1], 'rm -f /tmp/transfer_test_file')
    def test_accessing_file_when_dht_layout_is_stale(self):
        '''
        Description : Checks if a file can be opened and accessed if the dht
                      layout has become stale.

        Steps:
        1. Create, start and mount a volume consisting 2 subvols on 2 clients
        2. Create a dir `dir` and file `dir/file` from client0
        3. Take note of layouts of `brick1`/dir and `brick2`/dir of the volume
        4. Validate for success lookup from only one brick path
        5. Re-assign layouts ie., brick1/dir to brick2/dir and vice-versa
        6. Remove `dir/file` from client0 and recreate same file from client0
           and client1
        7. Validate for success lookup from only one brick path (as layout is
           changed file creation path will be changed)
        8. Validate checksum is matched from both the clients
        '''

        # Will be used in _get_brick_node_and_path
        self.dir_path = '/dir'

        # Will be used in argument to _assert_file_lookup
        file_name = '/file'

        dir_path = self.mounts[0].mountpoint + self.dir_path
        file_path = dir_path + file_name

        client0, client1 = self.clients[0], self.clients[1]
        fattr = 'trusted.glusterfs.dht'
        io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | '
                  'head -c 1K > {}'.format(file_path))

        # Create a dir from client0
        ret = mkdir(self.clients[0], dir_path)
        self.assertTrue(ret, 'Unable to create a directory from mount point')

        # Touch a file with data from client0
        ret, _, _ = g.run(client0, io_cmd)
        self.assertEqual(ret, 0, 'Failed to create a file on mount')

        # Yields `node` and `brick-path` from first brick of each subvol
        gen = self._get_brick_node_and_path()

        # Take note of newly created directory's layout from org_subvol1
        node1, fqpath1 = next(gen)
        layout1 = get_fattr(node1, fqpath1, fattr)
        self.assertIsNotNone(layout1,
                             '{} is not present on {}'.format(fattr, fqpath1))

        # Lookup on file from node1 should fail as `dir/file` will always get
        # hashed to node2 in a 2-brick distribute volume by default
        self._assert_file_lookup(node1,
                                 fqpath1 + file_name,
                                 when='before',
                                 result=False)

        # Take note of newly created directory's layout from org_subvol2
        node2, fqpath2 = next(gen)
        layout2 = get_fattr(node2, fqpath2, fattr)
        self.assertIsNotNone(layout2,
                             '{} is not present on {}'.format(fattr, fqpath2))

        # Lookup on file from node2 should pass
        self._assert_file_lookup(node2,
                                 fqpath2 + file_name,
                                 when='before',
                                 result=True)

        # Set org_subvol2 directory layout to org_subvol1 and vice-versa
        for node, fqpath, layout, vol in ((node1, fqpath1, layout2, (2, 1)),
                                          (node2, fqpath2, layout1, (1, 2))):
            ret = set_fattr(node, fqpath, fattr, layout)
            self.assertTrue(
                ret, 'Failed to set layout of org_subvol{} on '
                'brick {} of org_subvol{}'.format(vol[0], fqpath, vol[1]))

        # Remove file after layout change from client0
        cmd = 'rm -f {}'.format(file_path)
        ret, _, _ = g.run(client0, cmd)
        self.assertEqual(ret, 0, 'Failed to delete file after layout change')

        # Create file with same name as above after layout change from client0
        # and client1
        for client in (client0, client1):
            ret, _, _ = g.run(client, io_cmd)
            self.assertEqual(
                ret, 0, 'Failed to create file from '
                '{} after layout change'.format(client))

        # After layout change lookup on file from node1 should pass
        self._assert_file_lookup(node1,
                                 fqpath1 + file_name,
                                 when='after',
                                 result=True)

        # After layout change lookup on file from node2 should fail
        self._assert_file_lookup(node2,
                                 fqpath2 + file_name,
                                 when='after',
                                 result=False)

        # Take note of checksum from client0 and client1
        checksums = [None] * 2
        for index, mount in enumerate(self.mounts):
            ret, checksums[index] = collect_mounts_arequal(mount, dir_path)
            self.assertTrue(
                ret, 'Failed to get arequal on client {}'.format(
                    mount.client_system))

        # Validate no checksum mismatch
        self.assertEqual(checksums[0], checksums[1],
                         'Checksum mismatch between client0 and client1')

        g.log.info('Pass: Test accessing file on stale layout is complete.')
    def test_bitd_scrubd_snapd_after_volume_reset(self):
        # pylint: disable=too-many-statements
        '''
        -> Create volume
        -> Enable BitD, Scrub and Uss on volume
        -> Verify  the BitD, Scrub and Uss  daemons are running on every node
        -> Reset the volume
        -> Verify the Daemons (BitD, Scrub & Uss ) are running or not
        -> Eanble Uss on same volume
        -> Reset the volume with force
        -> Verify all the daemons(BitD, Scrub & Uss) are running or not
        '''

        # enable bitrot and scrub on volume
        g.log.info("Enabling bitrot")
        ret, _, _ = enable_bitrot(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to enable bitrot on volume: %s" %
                         self.volname)
        g.log.info("Bitd and scrub daemons enabled successfully on volume :%s",
                   self.volname)

        # enable uss on volume
        g.log.info("Enabling snaphot(uss)")
        ret, _, _ = enable_uss(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to enable uss on volume: %s" %
                         self.volname)
        g.log.info("uss enabled successfully on  volume :%s", self.volname)

        # Checks bitd, snapd, scrub daemons running or not
        g.log.info("checking snapshot, scrub and bitrot\
        daemons running or not")
        node_list = []
        list_of_bricks = get_all_bricks(self.mnode, self.volname)
        for brick in list_of_bricks:
            node, _ = brick.split(r':')
            node_list.append(node)
        for mnode in node_list:
            ret = is_bitd_running(mnode, self.volname)
            self.assertTrue(ret, "Bitrot Daemon not running on %s server:"
                            % mnode)
            ret = is_scrub_process_running(mnode, self.volname)
            self.assertTrue(ret, "Scrub Daemon not running on %s server:"
                            % mnode)
            ret = is_snapd_running(mnode, self.volname)
            self.assertTrue(ret, "Snap Daemon not running %s server:" % mnode)
        g.log.info("bitd, scrub and snapd running successflly on volume :%s",
                   self.volname)

        # command for volume reset
        g.log.info("started resetting volume")
        cmd = "gluster volume reset " + self.volname
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "volume reset failed for : %s" % self.volname)
        g.log.info("Volume reset successfully :%s", self.volname)

        # After volume reset snap daemon will not be running,
        # bitd and scrub daemons will be in running state.
        g.log.info("checking snapshot, scrub and bitrot daemons\
        running or not after volume reset")
        for mnode in node_list:
            ret = is_bitd_running(mnode, self.volname)
            self.assertTrue(ret, "Bitrot Daemon\
            not running on %s server:" % mnode)
            ret = is_scrub_process_running(mnode, self.volname)
            self.assertTrue(ret, "Scrub Daemon\
            not running on %s server:" % mnode)
            ret = is_snapd_running(mnode, self.volname)
            self.assertFalse(ret, "Snap Daemon should not be running on %s "
                             "server after volume reset:" % mnode)
        g.log.info("bitd and scrub daemons are running after volume reset "
                   "snapd is not running as expected on volume :%s",
                   self.volname)

        # enable uss on volume
        g.log.info("Enabling snaphot(uss)")
        ret, _, _ = enable_uss(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to enable uss on volume: %s" %
                         self.volname)
        g.log.info("uss enabled successfully on volume :%s", self.volname)

        # command for volume reset with force
        g.log.info("started resetting volume with force option")
        cmd = "gluster volume reset " + self.volname + " force"
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "volume reset fail\
               for : %s" % self.volname)
        g.log.info("Volume reset successfully with force option :%s",
                   self.volname)

        # After volume reset bitd, snapd, scrub daemons will not be running,
        # all three daemons will get die
        g.log.info("checking snapshot, scrub and bitrot daemons\
        running or not after volume reset with force")
        for mnode in node_list:
            ret = is_bitd_running(mnode, self.volname)
            self.assertFalse(ret, "Bitrot Daemon should not be\
            running on %s server after volume reset with force:" % mnode)
            ret = is_scrub_process_running(mnode, self.volname)
            self.assertFalse(ret, "Scrub Daemon shiuld not be running\
            on %s server after volume reset with force:" % mnode)
            ret = is_snapd_running(mnode, self.volname)
            self.assertFalse(ret, "Snap Daemon should not be\
            running on %s server after volume reset force:" % mnode)
        g.log.info("After volume reset bitd, scrub and snapd are not running "
                   "after volume reset with force on volume :%s", self.volname)
Beispiel #54
0
    def test_metadata_self_heal(self):
        """
        Test MetaData Self-Heal (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        - create IO
        - set the volume option
        "self-heal-daemon": "off"
        - bring down all bricks processes from selected set
        - Change the permissions, ownership and the group
        of the files under "test_meta_data_self_heal" folder
        - get arequal before getting bricks online
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check is heal is completed
        - check for split-brain
        - get arequal after getting bricks online and compare with
        arequal before getting bricks online
        - check group and user are 'qa'
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Setting options
        g.log.info('Setting options...')
        options = {"metadata-self-heal": "off",
                   "entry-self-heal": "off",
                   "data-self-heal": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "are set to 'off' successfully")

        # Creating files on client side
        all_mounts_procs = []
        test_meta_data_self_heal_folder = 'test_meta_data_self_heal'
        g.log.info("Generating data for %s:%s",
                   self.mounts[0].client_system, self.mounts[0].mountpoint)

        # Create files
        g.log.info('Creating files...')
        command = ("cd %s/ ; "
                   "mkdir %s ;"
                   "cd %s/ ;"
                   "for i in `seq 1 50` ; "
                   "do dd if=/dev/urandom of=test.$i bs=10k count=1 ; "
                   "done ;"
                   % (self.mounts[0].mountpoint,
                      test_meta_data_self_heal_folder,
                      test_meta_data_self_heal_folder))

        proc = g.run_async(self.mounts[0].client_system, command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # wait for io to complete
        self.assertTrue(
            wait_for_io_to_complete(all_mounts_procs, self.mounts),
            "Io failed to complete on some of the clients")

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(filter(None, (
            bricks_to_bring_offline_dict['hot_tier_bricks'] +
            bricks_to_bring_offline_dict['cold_tier_bricks'] +
            bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s offline' %
                        bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret, 'Bricks %s are not offline'
                        % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Changing the permissions, ownership and the group
        # of the files under "test_meta_data_self_heal" folder
        g.log.info("Modifying data for %s:%s",
                   self.mounts[0].client_system, self.mounts[0].mountpoint)

        # Change permissions to 444
        g.log.info('Changing permissions...')
        command = ("cd %s/%s/ ; "
                   "chmod -R 444 *"
                   % (self.mounts[0].mountpoint,
                      test_meta_data_self_heal_folder))
        ret, out, err = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, err)
        g.log.info('Permissions are changed successfully')

        # Change the ownership to qa
        g.log.info('Changing the ownership...')
        command = ("cd %s/%s/ ; "
                   "chown -R qa *"
                   % (self.mounts[0].mountpoint,
                      test_meta_data_self_heal_folder))
        ret, out, err = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, err)
        g.log.info('Ownership is changed successfully')

        # Change the group to qa
        g.log.info('Changing the group...')
        command = ("cd %s/%s/ ; "
                   "chgrp -R qa *"
                   % (self.mounts[0].mountpoint,
                      test_meta_data_self_heal_folder))
        ret, out, err = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, err)
        g.log.info('Group is changed successfully')

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s online' %
                        bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume process %s not online "
                              "despite waiting for 5 minutes", self.volname))
        g.log.info("Successful in waiting for volume %s processes to be "
                   "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online"
                              % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks online
        # and after bringing bricks online
        self.assertItemsEqual(result_before_online, result_after_online,
                              'Checksums are not equal')
        g.log.info('Checksums before bringing bricks online '
                   'and after bringing bricks online are equal')

        # Adding servers and client in single dict to check permissions
        nodes_to_check = {}
        all_bricks = get_all_bricks(self.mnode, self.volname)
        for brick in all_bricks:
            node, brick_path = brick.split(':')
            nodes_to_check[node] = brick_path
        nodes_to_check[self.mounts[0].client_system] = \
            self.mounts[0].mountpoint

        # Checking for user and group
        for node in nodes_to_check:
            # Get file list
            command = ("cd %s/%s/ ; "
                       "ls"
                       % (nodes_to_check[node],
                          test_meta_data_self_heal_folder))
            ret, out, err = g.run(node, command)
            file_list = out.split()

            for file_name in file_list:
                file_to_check = '%s/%s/%s' % (nodes_to_check[node],
                                              test_meta_data_self_heal_folder,
                                              file_name)

                g.log.info('Checking for permissions, user and group for %s',
                           file_name)

                # Check for permissions
                cmd = ("stat -c '%a %n' {} | awk '{{print $1}}'"
                       .format(file_to_check))
                ret, permissions, _ = g.run(node, cmd)
                self.assertEqual(permissions.split('\n')[0], '444',
                                 'Permissions %s is not equal to 444'
                                 % permissions)
                g.log.info("Permissions are '444' for %s", file_name)

                # Check for user
                cmd = ("ls -ld {} | awk '{{print $3}}'"
                       .format(file_to_check))
                ret, username, _ = g.run(node, cmd)
                self.assertEqual(username.split('\n')[0],
                                 'qa', 'User %s is not equal qa'
                                 % username)
                g.log.info("User is 'qa' for %s", file_name)

                # Check for group
                cmd = ("ls -ld {} | awk '{{print $4}}'"
                       .format(file_to_check))
                ret, groupname, _ = g.run(node, cmd)
                self.assertEqual(groupname.split('\n')[0],
                                 'qa', 'Group %s is not equal qa'
                                 % groupname)
                g.log.info("Group is 'qa' for %s", file_name)
Beispiel #55
0
def upload_scripts(list_of_nodes,
                   list_of_scripts_abs_path,
                   upload_dir="/usr/share/glustolibs/io/scripts/",
                   user=None):
    """Upload specified scripts to all the nodes.

    Args:
        list_of_nodes (list): Nodes on which scripts have to be uploaded.
        list_of_scripts_abs_path (list): List of absolute path of all
            scripts that are to be uploaded from local node.
        upload_dir (optional[str]): Name of the dir under which
            scripts will be uploaded on remote node.
        user (optional[str]): The user to use for the remote connection.

    Returns:
        bool: True if uploading scripts is successful on all nodes.
            False otherwise.
    """
    if not isinstance(list_of_nodes, list):
        list_of_nodes = [list_of_nodes]

    if not isinstance(list_of_scripts_abs_path, list):
        list_of_scripts_abs_path = (list_of_scripts_abs_path.split(" "))

    g.log.info("Scripts to upload: %s" % list_of_scripts_abs_path)
    g.log.info("Script upload dir: %s" % upload_dir)

    # Create upload dir on each node
    if not create_dirs(list_of_nodes, upload_dir):
        return False

    # Upload scrpts
    _rc = True
    for script_local_abs_path in list_of_scripts_abs_path:
        if not os.path.exists(script_local_abs_path):
            g.log.error("Script: %s doesn't exists" % script_local_abs_path)
            _rc = False
            break
        for node in list_of_nodes:
            script_name = os.path.basename(script_local_abs_path)
            script_upload_path = os.path.join(upload_dir, script_name)
            g.upload(node, script_local_abs_path, script_upload_path, user)
    if not _rc:
        g.log.error("Failed to upload scripts")
        return False

    # Recursively provide execute permissions to all scripts
    for node in list_of_nodes:
        ret, _, _ = g.run(node, "chmod -R +x %s" % upload_dir)
        if ret != 0:
            g.log.error("Unable to provide execute permissions to upload dir "
                        "'%s' on %s" % (upload_dir, node))
            return False
        else:
            g.log.info("Successfully provided execute permissions to upload "
                       "dir '%s' on %s" % (upload_dir, node))

        ret, out, err = g.run(node, "ls -l %s" % upload_dir)
        if ret != 0:
            g.log.error("Failed to list the dir: %s on node: %s - %s" %
                        (upload_dir, node, err))
        else:
            g.log.info("Listing dir: %s on node: %s - \n%s" %
                       (upload_dir, node, out))

    return True
    def test_heal_info_should_have_fixed_fields(self):
        """
        - Create IO
        - While IO is creating - bring down a couple of bricks
        - Wait for IO to complete
        - Bring up the down bricks
        - Wait for heal to complete
        - Check for fields 'Brick', 'Status', 'Number of entries' in heal info
        """
        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -f 50 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(
            filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                          bricks_to_bring_offline_dict['cold_tier_bricks'] +
                          bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get heal info
        g.log.info('Getting heal info...')
        heal_info_dicts = get_heal_info_summary(self.mnode, self.volname)
        self.assertFalse(ret, 'Failed to get heal info')
        g.log.info(heal_info_dicts)

        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, 'Brick list is None')

        # Check all fields in heal info dict
        g.log.info('Checking for all the fields in heal info...')
        for brick in bricks_list:
            g.log.info('Checking fields for %s', brick)
            self.assertEqual(heal_info_dicts[brick]['status'], 'Connected',
                             'Status is not Connected for brick %s' % brick)
            self.assertEqual(heal_info_dicts[brick]['numberOfEntries'], '0',
                             'numberOfEntries is not 0 for brick %s' % brick)

        g.log.info('Successfully checked for all the fields in heal info')
Beispiel #57
0
    def test_snap_delete_existing_scheduler(self):
        # pylint: disable=too-many-statements
        """
        Description:

        Validating snapshot scheduler behavior when existing schedule
        is deleted.

        Steps:
        * Enable shared volume
        * Create a volume
        * Initialise snap_scheduler on all nodes
        * Enable snap_scheduler
        * Validate snap_scheduler status
        * Perform IO on mounts
        * Schedule a job of creating snapshot every 30 mins
        * Perform snap_scheduler list
        * Delete scheduled job
        * Validate IO is successful
        * Perform snap_scheduler list
        """

        # Initialise snap scheduler
        g.log.info("Initialising snap_scheduler on all servers")
        count = 0
        while count < 80:
            ret = scheduler_init(self.servers)
            if ret:
                break
            sleep(2)
            count += 1
        self.assertTrue(ret, "Failed to initialise scheduler on all servers")
        g.log.info("Successfully initialised scheduler on all servers")

        # Enable snap scheduler
        g.log.info("Enabling snap_scheduler")
        ret, _, _ = scheduler_enable(self.mnode)
        self.assertEqual(ret, 0, "Failed to enable scheduler on node %s"
                         % self.mnode)
        g.log.info("Successfully enabled scheduler on node %s", self.mnode)

        # Validate snapshot scheduler status
        g.log.info("Validating status of snap_scheduler")
        for server in self.servers:
            count = 0
            while count < 40:
                ret, status, _ = scheduler_status(server)
                if status.strip().split(":")[2] == ' Enabled':
                    break
                sleep(2)
                count += 2
        self.assertEqual(status.strip().split(":")[2], ' Enabled',
                         "Failed to validate status of scheduler")
        g.log.info("Successfully validated scheduler status")

        # Write files on all mounts
        g.log.info("Starting IO on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_files "
                   "-f 10 --base-file-name file %s" % (
                       self.script_upload_path,
                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Add a job to schedule snapshot every 30 mins
        g.log.info("Starting to add new job")
        self.scheduler = r"*/30 * * * *"
        self.job_name = "Job1"
        ret, _, _ = scheduler_add_jobs(self.mnode, self.job_name,
                                       self.scheduler, self.volname)
        self.assertEqual(ret, 0, "Failed to add job")
        g.log.info("Successfully added Job on volume %s", self.volname)

        # Perform snap_scheduler list
        g.log.info("Starting to list all scheduler jobs")
        ret, _, _ = scheduler_list(self.mnode)
        self.assertEqual(ret, 0, "Failed to list scheduler jobs")
        g.log.info("Successfully listed all jobs")

        # Delete scheduled job
        g.log.info("Starting to delete scheduled jobs")
        ret, _, _ = scheduler_delete(self.mnode, self.job_name)
        self.assertEqual(ret, 0, "Failed to delete scheduled job")
        g.log.info("Successfully deleted scheduled job %s", self.job_name)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )

        # Perform snap_scheduler list (no active jobs should be present)
        g.log.info("Starting to list all scheduler jobs")
        ret, out, _ = scheduler_list(self.mnode)
        self.assertEqual(ret, 0, "Failed to list scheduler jobs")
        ret1 = out.strip().split(":")
        self.assertEqual(ret1[1], " No snapshots scheduled", "Unexpected: "
                         "Jobs are getting listed even after being deleted")
        g.log.info("Expected: No snapshots Jobs scheduled")
Beispiel #58
0
def cleanup_mounts(mounts):
    """Removes all the data from all the mountpoints

    Args:
        mounts (list): List of all GlusterMount objs.

    Returns:
        bool: True if cleanup is successful on all mounts. False otherwise.
    """
    if isinstance(mounts, GlusterMount):
        mounts = [mounts]

    g.log.info("Start cleanup mounts")
    all_mounts_procs = []
    valid_mounts = []
    for mount_obj in mounts:
        g.log.info("Cleaning up data from %s:%s", mount_obj.client_system,
                   mount_obj.mountpoint)
        if (not mount_obj.mountpoint or
                (os.path.realpath(os.path.abspath(mount_obj.mountpoint))
                 is '/')):
            g.log.error("%s on %s is not a valid mount point",
                        mount_obj.mountpoint, mount_obj.client_system)
            continue
        cmd = "rm -rf %s/*" % (mount_obj.mountpoint)
        proc = g.run_async(mount_obj.client_system, cmd,
                           user=mount_obj.user)
        all_mounts_procs.append(proc)
        valid_mounts.append(mount_obj)
    g.log.info("rm -rf on all clients is complete. Validating "
               "deletion now...")

    # Get cleanup status
    _rc_rmdir = True
    for i, proc in enumerate(all_mounts_procs):
        ret, out, err = proc.async_communicate()
        if ret != 0 or out or err:
            g.log.error("Deleting files/dirs Failed on %s:%s",
                        valid_mounts[i].client_system,
                        valid_mounts[i].mountpoint)
            _rc_rmdir = False
        else:
            g.log.info("Deleting files/dirs is successful on %s:%s",
                       valid_mounts[i].client_system,
                       valid_mounts[i].mountpoint)
    if _rc_rmdir:
        g.log.info("Successfully deleted files/dirs from all mounts")
    else:
        g.log.error("Deleting files/dirs failed on some of the mounts")

    # Check if mount points are empty
    ignore_dirs_list = [".trashcan"]
    ignore_dirs = "\|".join(ignore_dirs_list)
    all_mounts_procs = []
    for mount_obj in mounts:
        cmd = ("find %s -mindepth 1 | grep -ve '%s'" %
               (mount_obj.mountpoint, ignore_dirs))
        proc = g.run_async(mount_obj.client_system, cmd,
                           user=mount_obj.user)
        all_mounts_procs.append(proc)

    # Get cleanup status
    _rc_lookup = True
    for i, proc in enumerate(all_mounts_procs):
        ret, out, err = proc.async_communicate()
        if ret == 0:
            g.log.error("Mount %s on %s is still having entries:\n%s",
                        mounts[i].mountpoint, mounts[i].client_system, out)
            _rc_lookup = False
        else:
            g.log.info("Mount %s on %s is cleaned up\n%s",
                       mounts[i].mountpoint, mounts[i].client_system, out)
    if _rc_lookup:
        g.log.info("All the mounts are successfully cleaned up")
    else:
        g.log.error("Failed to cleanup all mounts")

    # List mounts entries
    g.log.info("Listing mounts entries:")
    list_all_files_and_dirs_mounts(mounts)

    return _rc_lookup
    def test_volume_set_ops_sub_dirs_mounted(self):
        """
        Check volume start/volume stop/volume reset operations while sub-dirs
        are mounted

        Steps:
        1. Create two sub-directories on mounted volume.
        2. Unmount volume from clients.
        3. Mount each sub-directory to two different clients.
        4. Perform IO on mounts.
        5. Perform volume stop operation.
        6. Perform volume start operation.
        7. Perform volume reset operation.
        """
        # Creating two sub directories on mounted volume
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd1' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd2' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))

        # Unmounting volumes
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Failed to un mount one or more volumes")
        g.log.info("Successfully un mounted all volumes")

        # Mounting one sub directory on each client.
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/d1" % self.volname
        self.subdir_mounts[1].volname = "%s/d2" % self.volname
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount sub directory %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted sub directory %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted sub directories to clients.")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Stop volume
        g.log.info("Stopping volume: %s", self.volname)
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop volume: %s" % self.volname)

        # Start volume
        g.log.info("Starting volume again: %s", self.volname)
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname)

        # Reset volume
        g.log.info("Resetting volume: %s", self.volname)
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to reset volume: %s" % self.volname)
Beispiel #60
0
def run_bonnie(servers, directory_to_run, username="******"):
    """
    Module to run bonnie test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Kwargs:
        username (str): username. Defaults to root.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_bonnie(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running bonnie tests on %s" % ','.join(servers))
    rt = True
    options_for_each_servers = []

    # Install bonnie test suite if not installed
    results = g.run_parallel(servers, "yum list installed bonnie++")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(server,
                                "yum list installed bonnie++ || "
                                "yum -y install bonnie++")
            if ret != 0:
                g.log.error("Failed to install bonnie on %s" % server)
                return False

        # Building options for bonnie tests
        options_list = []
        options = ""
        freemem_command = "free -g | grep Mem: | awk '{ print $2 }'"
        ret, out, _ = g.run(server, freemem_command)
        memory = int(out)
        g.log.info("Memory = %i", memory)
        options_list.append("-d %s -u %s" % (directory_to_run[index],
                                             username))
        if memory >= 8:
            options_list.append("-r 16G -s 16G -n 0 -m TEST -f -b")

        options = " ".join(options_list)
        options_for_each_servers.append(options)

    proc_list = []
    for index, server in enumerate(servers):
        bonnie_command = "bonnie++ %s" % (options_for_each_servers[index])
        proc = g.run_async(server, bonnie_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("Bonnie test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s/Bonnie.*"
                            % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove bonnie++")
        if ret != 0:
            g.log.error("Failed to remove bonnie from %s" % server)
            return False
    return rt