Exemple #1
0
    def test_download(self):
        """Testing SSH download() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        local_file = '/tmp/download_test_file'

        # remove local test file (ignore error if not exist)
        g.run_local('rm -f %s' % local_file)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode,  rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_up = rout.strip()

        # download it
        g.download(self.primary_host,
                   '/etc/hosts', '/tmp/download_test_file')

        # md5sum local copy
        command = 'md5sum %s | awk \'{print $1}\'' % local_file
        rcode, rout, _ = g.run_local(command)
        if rcode == 0:
            md5sum_down = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_down, md5sum_up, 'md5sums do not match')
Exemple #2
0
    def setUp(self):
        """
        Setup and mount volume or raise ExecutionError
        """
        self.get_super_method(self, 'setUp')()

        # Setup Volume
        ret = self.setup_volume_and_mount_volume([self.mounts[0]])
        if not ret:
            raise ExecutionError("Failed to Setup and Mount Volume")

        # Add a new user to the clients
        ret = add_user(self.clients[0], "test_user1")
        if ret is not True:
            raise ExecutionError("Failed to add user")

        # Set password for user "test_user1"
        ret = set_passwd(self.clients[0], "test_user1", "red123")
        if ret is not True:
            raise ExecutionError("Failed to set password")

        # Geneate ssh key on local host
        cmd = 'echo -e "n" | ssh-keygen -f ~/.ssh/id_rsa -q -N ""'
        ret, out, _ = g.run_local(cmd)
        if ret and "already exists" not in out:
            raise ExecutionError("Failed to generate ssh-key")
        g.log.info("Successfully generated ssh-key")

        # Perform ssh-copy-id
        cmd = ('sshpass -p "red123" ssh-copy-id -o StrictHostKeyChecking=no'
               ' test_user1@{}'.format(self.clients[0]))
        ret, _, _ = g.run_local(cmd)
        if ret:
            raise ExecutionError("Failed to perform ssh-copy-id")
        g.log.info("Successfully performed ssh-copy-id")
Exemple #3
0
    def test_download(self):
        """Testing SSH download() method"""
        print "Running: %s - %s" % (self.id(), self.shortDescription())

        remote_file = '/etc/hosts'
        local_file = '/tmp/download_test_file'

        # remove local test file (ignore error if not exist)
        g.run_local('rm -f %s' % local_file)

        # md5sum remote file
        command = 'md5sum %s| awk \'{print $1}\'' % remote_file
        rcode, rout, _ = g.run(self.primary_host, command)
        if rcode == 0:
            md5sum_up = rout.strip()

        # download it
        g.download(self.primary_host, '/etc/hosts', '/tmp/download_test_file')

        # md5sum local copy
        command = 'md5sum %s | awk \'{print $1}\'' % local_file
        rcode, rout, _ = g.run_local(command)
        if rcode == 0:
            md5sum_down = rout.strip()

        # compare the md5sums
        self.assertEqual(md5sum_down, md5sum_up, 'md5sums do not match')
Exemple #4
0
def are_nodes_offline(nodes):
    """Check whether nodes are offline or not.

    Args:
        nodes (str|list): Node(s) to check whether offline or not.

    Returns:
        tuple : Tuple containing two elements (ret, node_results).
        The first element ret is of type 'bool', True if all nodes
        are offline. False otherwise.

        The second element 'node_results' is of type dictonary and it
        contains the node and its corresponding result. If node is offline
        then the result contains True else False.
    """

    if not isinstance(nodes, list):
        nodes = [nodes]

    node_results = {}
    for node in nodes:
        cmd = "ping %s -c1" % node
        ret, out, err = g.run_local(cmd)
        if ret:
            g.log.info("%s is offline" % node)
            node_results[node] = True
        else:
            g.log.info("%s is online" % node)
            node_results[node] = False

    ret = all(node_results.values())

    return ret, node_results
Exemple #5
0
    def test_peer_probe_invalid_ip_nonexist_host_nonexist_ip(self):
        '''
        Test script to verify peer probe non existing ip,
        non_exsting_host and invalid-ip, peer probe has to
        be fail for invalid-ip, non-existing-ip and
        non existing host, verify Glusterd services up and
        running or not after invalid peer probe,
        and core file should not get created
        under "/", /var/log/core and /tmp  directory
        '''
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()
        g.log.info("Running Test : %s", self.id())

        # Assigning non existing ip to variable
        self.non_exist_ip = '256.256.256.256'

        # Assigning invalid ip to variable
        self.invalid_ip = '10.11.a'

        # Assigning non existing host to variable
        self.non_exist_host = 'abc.lab.eng.blr.redhat.com'

        # Peer probe checks for non existing host
        g.log.info("peer probe checking for non existing host")
        ret, _, _ = peer_probe(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "peer probe should fail for "
            "non existhost: %s" % self.non_exist_host)
        g.log.info("peer probe failed for non existing host")

        # Peer probe checks for invalid ip
        g.log.info("peer probe checking for invalid ip")
        ret, _, _ = peer_probe(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "peer probe shouldfail for "
            "invalid ip: %s" % self.invalid_ip)
        g.log.info("peer probe failed for invalid_ip")

        # peer probe checks for non existing ip
        g.log.info("peer probe checking for non existing ip")
        ret, _, _ = peer_probe(self.mnode, self.non_exist_ip)
        self.assertNotEqual(
            ret, 0, "peer probe should fail for non exist "
            "ip :%s" % self.non_exist_ip)
        g.log.info("peer probe failed for non existing ip")

        # Checks Glusterd services running or not after peer probe
        # to invalid host and non existing host

        self.mnode_list = []
        self.mnode_list.append(self.mnode)
        ret = is_glusterd_running(self.mnode_list)
        self.assertEqual(ret, 0, "Glusterd service should be running")

        # Chekcing core file created or not in "/", "/tmp" and
        # "/var/log/core" directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
Exemple #6
0
 def test_run_local(self):
     """Testing SSH run_local() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run_local('echo -n %s' % self.test_string)
     self.assertEqual(rcode, 0)
     self.assertEqual(rout, self.test_string)
     print rout
     self.assertEqual(rerr, '')
Exemple #7
0
 def test_run_local(self):
     """Testing SSH run_local() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     rcode, rout, rerr = g.run_local('echo -n %s' % self.test_string)
     self.assertEqual(rcode, 0)
     self.assertEqual(rout, self.test_string)
     print rout
     self.assertEqual(rerr, '')
    def test_concurrent_set(self):
        # time stamp of current test case
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()
        # Create a volume
        self.volname = "first-vol"
        self.brick_list = form_bricks_list(self.mnode, self.volname, 3,
                                           self.servers, self.all_servers_info)

        ret = volume_create(self.mnode,
                            self.volname,
                            self.brick_list,
                            force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume % s" % self.volname))
        g.log.info("Volume created successfuly % s" % self.volname)

        # Create a volume
        self.volname = "second-vol"
        self.brick_list = form_bricks_list(self.mnode, self.volname, 3,
                                           self.servers, self.all_servers_info)
        g.log.info("Creating a volume")
        ret = volume_create(self.mnode,
                            self.volname,
                            self.brick_list,
                            force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume % s" % self.volname))
        g.log.info("Volume created successfuly % s" % self.volname)

        cmd1 = ("for i in `seq 1 100`; do gluster volume set first-vol "
                "read-ahead on; done")
        cmd2 = ("for i in `seq 1 100`; do gluster volume set second-vol "
                "write-behind on; done")

        proc1 = g.run_async(random.choice(self.servers), cmd1)
        proc2 = g.run_async(random.choice(self.servers), cmd2)

        ret1, out1, err1 = proc1.async_communicate()
        ret2, out2, err2 = proc2.async_communicate()

        self.assertEqual(
            ret1, 0, "Concurrent volume set on different volumes "
            "simultaneously failed")
        self.assertEqual(
            ret2, 0, "Concurrent volume set on different volumes "
            "simultaneously failed")

        g.log.info("Setting options on different volumes @ same time "
                   "successfully completed")
        ret = is_core_file_created(self.servers, test_timestamp)
        if (ret):
            g.log.info("No core file found, glusterd service "
                       "running successfully")
        else:
            g.log.error("core file found in directory, it "
                        "indicates the glusterd service crash")
            self.assertTrue(ret, ("glusterd service should not crash"))
def add_node_to_nfs_ganesha_cluster(servers, node_to_add, vip):
    """Adds a node to nfs ganesha cluster using gdeploy

    Args:
        servers (list): Nodes of existing nfs-ganesha cluster.
        node_to_add (str): Node to add in existing nfs-ganesha cluster.
        vip (str): virtual IP of the node mentioned in 'node_to_add'
            param.

    Returns:
        bool : True on successfully adding node to nfs-ganesha cluster.
            False otherwise

    Example:
        add_node_to_nfs_ganesha_cluster(servers, node_to_add, vip)
    """

    conf_file = "add_node_to_nfs_ganesha_cluster.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")
    cluster_nodes = servers
    hosts = servers + [node_to_add]

    values_to_substitute_in_template = {
        'servers': hosts,
        'node_to_add': node_to_add,
        'cluster_nodes': cluster_nodes,
        'vip': vip
    }

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for adding node "
                    "in existing nfs ganesha cluster" % cmd)
        g.log.error("gdeploy console output for adding node in "
                    "existing nfs-ganesha cluster: %s" % stderr)

        return False

    g.log.info("gdeploy output for adding node in existing "
               "nfs-ganesha cluster: %s" % stdout)

    # pcs status output
    _, _, _ = g.run(servers[0], "pcs status")

    # Removing gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
def teardown_nfs_ganesha_cluster(servers, force=False):
    """Teardown nfs ganesha cluster using gdeploy

    Args:
        servers (list): Nodes in nfs-ganesha cluster to teardown entire
            cluster
        force (bool): if this option is set to True, then nfs ganesha cluster
            is teardown using force cleanup

    Returns:
        bool : True on successfully teardown nfs-ganesha cluster.
            False otherwise

    Example:
        teardown_nfs_ganesha_cluster(servers)
    """

    conf_file = "teardown_nfs_ganesha_cluster.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {'servers': servers}

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for teardown nfs "
                    "ganesha cluster" % cmd)
        g.log.error("gdeploy console output for teardown nfs-ganesha "
                    "cluster: %s" % stderr)

        return False

    g.log.info("gdeploy output for teardown nfs-ganesha cluster: %s" % stdout)

    # Removing gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)

    if force:
        g.log.info("Executing force cleanup...")
        for server in servers:
            cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --teardown "
                   "/var/run/gluster/shared_storage/nfs-ganesha")
            _, _, _ = g.run(server, cmd)
            _, _, _ = stop_nfs_ganesha_service(server)

    return True
def update_volume_export_configuration(mnode, volname, config_to_update):
    """Updates volume export configuration and runs
       refresh config for the volume.

    Args:
        mnode (str): Node in which refresh config command will
            be executed.
        volname (str): volume name
        config_to_update (str): config lines to update in volume
            export configuration file.

    Returns:
        bool : True on successfully updating export config for
            nfs-ganesha volume. False otherwise

    Example:
        update_volume_export_configuration(mnode, volname, config_to_update)
    """

    conf_file = "nfs_ganesha_update_export_file.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {
        'server': mnode,
        'volname': volname,
        'config_to_update': config_to_update
    }

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s to update export "
                    "configuration on nfs ganesha volume" % cmd)
        g.log.error("gdeploy console output to update export "
                    "configuration on nfs ganesha volume: %s" % stderr)

        return False

    g.log.info("gdeploy output to update export configuration "
               "on nfs ganesha volume: %s" % stdout)

    # Removing the gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
def delete_node_from_nfs_ganesha_cluster(servers, node_to_delete):
    """Deletes a node from existing nfs ganesha cluster using gdeploy

    Args:
        servers (list): Nodes of existing nfs-ganesha cluster.
        node_to_delete (str): Node to delete from existing nfs-ganesha cluster.

    Returns:
        bool : True on successfully creating nfs-ganesha cluster.
            False otherwise

    Example:
        delete_node_from_nfs_ganesha_cluster(servers, node_to_delete)
    """

    conf_file = "delete_node_from_nfs_ganesha_cluster.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {
        'servers': servers,
        'node_to_delete': node_to_delete
    }

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for deleting node "
                    "from existing nfs ganesha cluster" % cmd)
        g.log.error("gdeploy console output for deleting node from "
                    "existing nfs-ganesha cluster: %s" % stderr)

        return False

    g.log.info("gdeploy output for deleting node from existing "
               "nfs-ganesha cluster: %s" % stdout)

    # pcs status output
    _, _, _ = g.run(servers[0], "pcs status")

    # Removing gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
Exemple #13
0
 def test_upload(self):
     """Testing SSH upload() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     g.run(self.primary_host, 'rm -f /tmp/upload_test_file')
     rcode, rout, _ = g.run_local('md5sum /etc/hosts | awk \'{print $1}\'')
     if rcode == 0:
         md5sum = rout.strip()
     g.upload(self.primary_host, '/etc/hosts', '/tmp/upload_test_file')
     command = 'md5sum /tmp/upload_test_file | awk \'{print $1}\''
     rcode, rout, _ = g.run(self.primary_host, command)
     if rcode == 0:
         md5sum_up = rout.strip()
     self.assertEqual(md5sum, md5sum_up, '')
    def test_enable_brickmux_create_and_stop_three_volumes(self):
        """
        Test Case:
        1.Set cluster.brick-multiplex to enabled.
        2.Create three 1x3 replica volumes.
        3.Start all the three volumes.
        4.Stop three volumes one by one.
        """

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Setting cluster.brick-multiplex to enable
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to set brick-multiplex to enable.")
        g.log.info("Successfully set brick-multiplex to enable.")

        # Create and start 3 volume
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
            self.assertTrue(ret,
                            "Failed to create and start %s" % self.volname)
            g.log.info("Successfully created and started volume %s.",
                       self.volname)

        # Checking brick process count.
        for brick in get_all_bricks(self.mnode, self.volname):
            server = brick.split(":")[0]
            count = get_brick_processes_count(server)
            self.assertEqual(
                count, 1, "ERROR: More than one brick process on %s." % server)
            g.log.info("Only one brick process present on %s", server)

        # Stop three volumes one by one.
        for number in range(1, 4):
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret, _, _ = volume_stop(self.mnode, self.volname)
            self.assertEqual(ret, 0,
                             "Failed to stop the volume %s" % self.volname)
            g.log.info("Volume %s stopped successfully", self.volname)

        # Checking for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "Core file found.")
        g.log.info("No core files found, glusterd service running "
                   "successfully")
Exemple #15
0
 def test_upload(self):
     """Testing SSH upload() method"""
     print "Running: %s - %s" % (self.id(), self.shortDescription())
     g.run(self.primary_host, 'rm -f /tmp/upload_test_file')
     rcode, rout, _ = g.run_local('md5sum /etc/hosts | awk \'{print $1}\'')
     if rcode == 0:
         md5sum = rout.strip()
     g.upload(self.primary_host,
              '/etc/hosts', '/tmp/upload_test_file')
     command = 'md5sum /tmp/upload_test_file | awk \'{print $1}\''
     rcode,  rout, _ = g.run(self.primary_host, command)
     if rcode == 0:
         md5sum_up = rout.strip()
     self.assertEqual(md5sum, md5sum_up, '')
def create_nfs_ganesha_cluster(servers, vips):
    """Creates nfs ganesha cluster using gdeploy

    Args:
        servers (list): Nodes in which nfs-ganesha cluster will be created.
        vips (list): virtual IPs of each servers mentioned in 'servers'
            param.

    Returns:
        bool : True on successfully creating nfs-ganesha cluster.
            False otherwise

    Example:
        create_nfs_ganesha_cluster(servers, vips)
    """

    conf_file = "create_nfs_ganesha_cluster.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {'servers': servers, 'vips': vips}

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for creating nfs "
                    "ganesha cluster" % cmd)
        g.log.error("gdeploy console output for creating nfs-ganesha "
                    "cluster: %s" % stderr)

        return False

    g.log.info("gdeploy output for creating nfs-ganesha cluster: %s" % stdout)

    # pcs status output
    _, _, _ = g.run(servers[0], "pcs status")

    # Removing the gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
def run_refresh_config(mnode, volname):
    """Runs refresh config on nfs ganesha volume.

    Args:
        mnode (str): Node in which refresh config command will
            be executed.
        volname (str): volume name

    Returns:
        bool : True on successfully running refresh config on
            nfs-ganesha volume. False otherwise

    Example:
        run_refresh_config("abc.com", volname)
    """

    conf_file = "nfs_ganesha_refresh_config.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file

    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {'server': mnode,
                                        'volname': volname}

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file"
                    % tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for running "
                    "refresh config on nfs ganesha volume" % cmd)
        g.log.error("gdeploy console output for running refresh config "
                    "on nfs ganesha volume: %s" % stderr)

        return False

    g.log.info("gdeploy output for running refresh config "
               "on nfs ganesha volume: %s" % stdout)

    # Removing the gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
def set_nfs_ganesha_client_configuration(client_nodes):
    """Sets pre-requisites in the client machines to
       mount with nfs-ganesha.

    Args:
        client_nodes (list): Client nodes in which the prerequisite
            are done to do nfs-ganesha mount.

    Returns:
        bool : True on successfully creating nfs-ganesha cluster.
            False otherwise

    Example:
        set_nfs_ganesha_client_configuration(client_nodes)
    """

    conf_file = "nfs_ganesha_client_configuration.jinja"
    gdeploy_config_file = GDEPLOY_CONF_DIR + conf_file
    tmp_gdeploy_config_file = ("/tmp/" + os.path.splitext(conf_file)[0] +
                               ".conf")

    values_to_substitute_in_template = {'servers': client_nodes}

    ret = g.render_template(gdeploy_config_file,
                            values_to_substitute_in_template,
                            tmp_gdeploy_config_file)
    if not ret:
        g.log.error("Failed to substitute values in %s file" %
                    tmp_gdeploy_config_file)
        return False

    cmd = "gdeploy -c " + tmp_gdeploy_config_file
    retcode, stdout, stderr = g.run_local(cmd)
    if retcode != 0:
        g.log.error("Failed to execute gdeploy cmd %s for setting nfs "
                    "ganesha client configuration" % cmd)
        g.log.error("gdeploy console output for setting nfs-ganesha "
                    "client configuration: %s" % stderr)

        return False

    g.log.info("gdeploy output for setting nfs-ganesha client "
               "configuration: %s" % stdout)

    # Removing the gdeploy conf file from /tmp
    os.remove(tmp_gdeploy_config_file)
    return True
Exemple #19
0
    def test_verify_peer_probe_with_firewall_ports_not_opened(self):
        """
        Test Steps:
        1. Open glusterd port only in  Node1 using firewall-cmd command
        2. Perform peer probe to Node2 from Node 1
        3. Verify glusterd.log for Errors
        4. Check for core files created
        """

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Remove firewall service on the node to probe to
        self._remove_firewall_service()

        # Try peer probe from mnode to node
        ret, _, err = peer_probe(self.mnode, self.node_to_probe)
        self.assertEqual(ret, 1, ("Unexpected behavior: Peer probe should"
                                  " fail when the firewall services are "
                                  "down but returned success"))

        expected_err = ('peer probe: failed: Probe returned with '
                        'Transport endpoint is not connected\n')
        self.assertEqual(
            err, expected_err,
            "Expected error {}, but returned {}".format(expected_err, err))
        msg = ("Peer probe of {} from {} failed as expected ".format(
            self.mnode, self.node_to_probe))
        g.log.info(msg)

        # Verify there are no glusterd crashes
        status = True
        glusterd_logs = (self._get_test_specific_glusterd_log(
            self.mnode).split("\n"))
        for line in glusterd_logs:
            if ' E ' in line:
                status = False
                g.log.info("Error found: ' %s '", line)

        self.assertTrue(status, "Glusterd crash found")

        # Verify no core files are created
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "Unexpected crash found.")
        g.log.info("No core file found as expected")
    def test_logs_while_peer_detach(self):
        '''
        -> Detach the node from peer
        -> Check that any error messages related to peer detach
        in glusterd log file
        -> No errors should be there in glusterd log file
        '''

        # Getting timestamp
        _, timestamp, _ = g.run_local('date +%s')
        timestamp = timestamp.strip()

        #  glusterd logs
        ret, _, _ = g.run(self.mnode,
                          'cp /var/log/glusterfs/glusterd.log '
                          '/var/log/glusterfs/glusterd_%s.log' % timestamp)
        if ret:
            raise ExecutionError("Failed to copy glusterd logs")

        # Clearing the existing glusterd log file
        ret, _, _ = g.run(self.mnode, 'echo > /var/log/glusterfs/glusterd.log')
        if ret:
            raise ExecutionError("Failed to clear glusterd.log file on %s"
                                 % self.mnode)

        # Performing peer detach
        self.random_server = random.choice(self.servers[1:])
        ret = peer_detach_servers(self.mnode, self.random_server)
        self.assertTrue(ret, "Failed to detach peer %s"
                        % self.random_server)
        g.log.info("Peer detach successful for %s", self.random_server)

        # Searching for error message in log
        ret, out, _ = g.run(
            self.mnode,
            "grep ' E ' /var/log/glusterfs/glusterd.log | wc -l")
        self.assertEqual(ret, 0, "Failed to get error message count in "
                                 "glusterd log file")
        g.log.info("Successful getting error message count in log file")

        self.assertEqual(int(out), 0, "Found Error messages in glusterd log "
                                      "file after peer detach")
        g.log.info("No error messages found in gluterd log file after peer "
                   "detach")
    def test_brick_log_messages(self):
        '''
        -> Create volume
        -> Mount volume
        -> write files on mount point
        -> delete files from mount point
        -> check for any errors filled in all brick logs
        '''

        # checking volume mounted or not
        for mount_obj in self.mounts:
            ret = is_mounted(self.volname, mount_obj.mountpoint, self.mnode,
                             mount_obj.client_system, self.mount_type)
            self.assertTrue(ret, "Not mounted on %s" % mount_obj.client_system)
            g.log.info("Mounted on %s", mount_obj.client_system)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 5 "
                "--max-num-of-dirs 3 "
                "--num-of-files 10 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Getting timestamp
        _, timestamp, _ = g.run_local('date +%s')
        timestamp = timestamp.strip()

        # Getting all bricks
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list")
        g.log.info("Successful in getting brick list %s", brick_list)

        # Creating dictionary for each node brick path,
        # here nodes are keys and brick paths are values
        brick_path_dict = {}
        for brick in brick_list:
            node, brick_path = brick.split(r':')
            brick_path_list = brick_path.split(r'/')
            del brick_path_list[0]
            brick_log_path = '-'.join(brick_path_list)
            brick_path_dict[node] = brick_log_path

        for node in brick_path_dict:
            #  Copying brick logs into other file for backup purpose
            ret, _, _ = g.run(
                node, 'cp /var/log/glusterfs/bricks/%s.log '
                '/var/log/glusterfs/bricks/%s_%s.log' %
                (brick_path_dict[node], brick_path_dict[node], timestamp))
            if ret:
                raise ExecutionError("Failed to copy brick logs of %s" % node)
            g.log.info("Brick logs copied successfully on node %s", node)

            # Clearing the existing brick log file
            ret, _, _ = g.run(
                node, 'echo > /var/log/glusterfs/bricks/%s.log' %
                brick_path_dict[node])
            if ret:
                raise ExecutionError("Failed to clear brick log file on %s" %
                                     node)
            g.log.info("Successfully cleared the brick log files on node %s",
                       node)

        # Deleting files from mount point
        ret, _, _ = g.run(self.mounts[0].client_system,
                          'rm -rf %s/*' % self.mounts[0].mountpoint)
        self.assertEqual(
            ret, 0, "Failed to delete files from mountpoint %s" %
            self.mounts[0].mountpoint)
        g.log.info("Files deleted successfully from mountpoint %s",
                   self.mounts[0].mountpoint)

        # Searching for error messages in brick logs after deleting
        # files from mountpoint
        for node in brick_path_dict:
            ret, out, _ = g.run(
                node, "grep ' E ' /var/log/glusterfs/bricks/%s.log | wc -l" %
                brick_path_dict[node])
            self.assertEqual(int(out), 0, "Found Error messages in brick "
                             "log %s" % node)
            g.log.info("No error messages found in brick log %s", node)
    def test_volume_get(self):
        """
        desc: performing different combinations of gluter
        volume get functionalities
        1. Create a gluster cluster
        2. Get the option from the non-existing volume,
        gluster volume get <non-existing vol> io-cache
        3. Get all options from the non-existing volume,
        gluster volume get <non-existing volume > all
        4. Provide a incorrect command syntax to get the options
        from the volume
            gluster volume get <vol-name>
            gluster volume get
            gluster volume get io-cache
        5. Create any type of volume in the cluster
        6. Get the value of the non-existing option
            gluster volume get <vol-name> temp.key
        7. get all options set on the volume
            gluster volume get <vol-name> all
        8. get the specific option set on the volume
            gluster volume get <vol-name> io-cache
        9. Set an option on the volume
            gluster volume set <vol-name> performance.low-prio-threads 14
        10. Get all the options set on the volume and check
        for low-prio-threads
            gluster volume get <vol-name> all then get the
            low-prio-threads value
        11. Get all the options set on the volume
                gluster volume get <vol-name> all
        12.  Check for any cores in "cd /"
        """
        # pylint: disable=too-many-statements

        # time stamp of current test case
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # performing gluster volume get command for non exist volume io-cache
        self.non_exist_volume = "abc99"
        ret, _, err = g.run(
            self.mnode,
            "gluster volume get %s io-cache" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for non existing volume with io-cache "
            "option :%s" % self.non_exist_volume)
        msg = ('Volume ' + self.non_exist_volume + ' does not exist')
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume with io-cache option :%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume with io-cache option"
            ":%s", self.non_exist_volume)

        # performing gluster volume get all command for non exist volume
        ret, _, err = g.run(
            self.mnode, "gluster volume get %s all" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for non existing volume %s with all "
            "option" % self.non_exist_volume)
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume with all option:%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume with all option :%s", self.non_exist_volume)

        # performing gluster volume get command for non exist volume
        ret, _, err = g.run(self.mnode, "gluster volume get "
                            "%s" % self.non_exist_volume)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should "
            "fail for non existing volume :%s" % self.non_exist_volume)
        msg = 'get <VOLNAME|all> <key|all>'
        self.assertIn(
            msg, err, "No proper error message for non existing "
            "volume :%s" % self.non_exist_volume)
        g.log.info(
            "gluster volume get command failed successfully for non "
            "existing volume :%s", self.non_exist_volume)

        # performing gluster volume get command without any volume name given
        ret, _, err = g.run(self.mnode, "gluster volume get")
        self.assertNotEqual(ret, 0, "gluster volume get command should fail")
        self.assertIn(
            msg, err, "No proper error message for gluster "
            "volume get command")
        g.log.info("gluster volume get command failed successfully")

        # performing gluster volume get io-cache command
        # without any volume name given
        ret, _, err = g.run(self.mnode, "gluster volume get io-cache")
        self.assertNotEqual(
            ret, 0, "gluster volume get io-cache command "
            "should fail")
        self.assertIn(
            msg, err, "No proper error message for gluster volume "
            "get io-cache command")
        g.log.info("gluster volume get io-cache command failed successfully")

        # gluster volume get volname with non existing option
        ret, _, err = g.run(self.mnode, "gluster volume "
                            "get %s temp.key" % self.volname)
        self.assertNotEqual(
            ret, 0, "gluster volume get command should fail "
            "for existing volume %s with non-existing "
            "option" % self.volname)
        msg = 'Did you mean auth.allow or ...reject?'
        self.assertIn(
            msg, err, "No proper error message for existing "
            "volume %s with non-existing option" % self.volname)
        g.log.info(
            "gluster volume get command failed successfully for "
            "existing volume %s with non existing option", self.volname)

        # performing gluster volume get volname all

        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all command "
            "failed" % self.volname)
        g.log.info(
            "gluster volume get %s all command executed "
            "successfully", self.volname)

        # performing gluster volume get volname io-cache
        ret = get_volume_options(self.mnode, self.volname, "io-cache")
        self.assertIsNotNone(
            ret, "gluster volume get %s io-cache command "
            "failed" % self.volname)
        self.assertIn("on", ret['performance.io-cache'], "io-cache value "
                      "is not correct")
        g.log.info("io-cache value is correct")

        # Performing gluster volume set volname performance.low-prio-threads
        prio_thread = {'performance.low-prio-threads': '14'}
        ret = set_volume_options(self.mnode, self.volname, prio_thread)
        self.assertTrue(
            ret, "gluster volume set %s performance.low-prio-"
            "threads failed" % self.volname)
        g.log.info(
            "gluster volume set %s "
            "performance.low-prio-threads executed successfully", self.volname)

        # Performing gluster volume get all, checking low-prio threads value
        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all "
            "failed" % self.volname)
        self.assertIn("14", ret['performance.low-prio-threads'],
                      "performance.low-prio-threads value is not correct")
        g.log.info("performance.low-prio-threads value is correct")

        # performing gluster volume get volname all
        ret = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(
            ret, "gluster volume get %s all command "
            "failed" % self.volname)
        g.log.info(
            "gluster volume get %s all command executed "
            "successfully", self.volname)

        # Checking core file created or not in "/" directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service "
                   "running successfully")
    def setUpClass(cls):
        """Setup nfs-ganesha cluster
        tests.
        """

        # Check if gdeploy is installed on glusto-tests management node.
        ret, _, _ = g.run_local("gdeploy --version")
        if ret != 0:
            raise ConfigError("Please install gdeploy to run the scripts")

        GlusterBaseClass.setUpClass.im_func(cls)

        # Check if enable_nfs_ganesha is set in config file
        if not cls.enable_nfs_ganesha:
            raise ConfigError("Please enable nfs ganesha in config")

        # Read num_of_nfs_ganesha_nodes from config file and create
        # nfs ganesha cluster accordingly
        cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes)
        cls.servers_in_nfs_ganesha_cluster = (
            cls.servers[:cls.num_of_nfs_ganesha_nodes])
        cls.vips_in_nfs_ganesha_cluster = (
            cls.vips[:cls.num_of_nfs_ganesha_nodes])

        # Create nfs ganesha cluster if not exists already
        if (is_nfs_ganesha_cluster_exists(
                cls.servers_in_nfs_ganesha_cluster[0])):
            if is_nfs_ganesha_cluster_in_healthy_state(
                    cls.servers_in_nfs_ganesha_cluster[0]):
                g.log.info("Nfs-ganesha Cluster exists and is in healthy "
                           "state. Skipping cluster creation...")
            else:
                g.log.info("Nfs-ganesha Cluster exists and is not in "
                           "healthy state.")
                g.log.info("Tearing down existing cluster which is not in "
                           "healthy state")
                ganesha_ha_file = ("/var/run/gluster/shared_storage/"
                                   "nfs-ganesha/ganesha-ha.conf")

                g.log.info("Collecting server details of existing "
                           "nfs ganesha cluster")
                conn = g.rpyc_get_connection(
                    cls.servers_in_nfs_ganesha_cluster[0], user="******")
                if conn is None:
                    tmp_node = cls.servers_in_nfs_ganesha_cluster[0]
                    raise ExecutionError("Unable to get connection to 'root' "
                                         " of node %s " % tmp_node)
                if not conn.modules.os.path.exists(ganesha_ha_file):
                    raise ExecutionError("Unable to locate %s" %
                                         ganesha_ha_file)
                with conn.builtin.open(ganesha_ha_file, "r") as fh:
                    ganesha_ha_contents = fh.read()
                g.rpyc_close_connection(
                    host=cls.servers_in_nfs_ganesha_cluster[0], user="******")
                servers_in_existing_cluster = re.findall(
                    r'VIP_(.*)\=.*', ganesha_ha_contents)

                ret = teardown_nfs_ganesha_cluster(servers_in_existing_cluster,
                                                   force=True)
                if not ret:
                    raise ExecutionError("Failed to teardown nfs "
                                         "ganesha cluster")
                g.log.info("Existing cluster got teardown successfully")
                g.log.info("Creating nfs-ganesha cluster of %s nodes" %
                           str(cls.num_of_nfs_ganesha_nodes))
                g.log.info("Nfs-ganesha cluster node info: %s" %
                           cls.servers_in_nfs_ganesha_cluster)
                g.log.info("Nfs-ganesha cluster vip info: %s" %
                           cls.vips_in_nfs_ganesha_cluster)
                ret = create_nfs_ganesha_cluster(
                    cls.servers_in_nfs_ganesha_cluster,
                    cls.vips_in_nfs_ganesha_cluster)
                if not ret:
                    raise ExecutionError("Failed to create "
                                         "nfs-ganesha cluster")
        else:
            g.log.info("Creating nfs-ganesha cluster of %s nodes" %
                       str(cls.num_of_nfs_ganesha_nodes))
            g.log.info("Nfs-ganesha cluster node info: %s" %
                       cls.servers_in_nfs_ganesha_cluster)
            g.log.info("Nfs-ganesha cluster vip info: %s" %
                       cls.vips_in_nfs_ganesha_cluster)
            ret = create_nfs_ganesha_cluster(
                cls.servers_in_nfs_ganesha_cluster,
                cls.vips_in_nfs_ganesha_cluster)
            if not ret:
                raise ExecutionError("Failed to create " "nfs-ganesha cluster")

        if is_nfs_ganesha_cluster_in_healthy_state(
                cls.servers_in_nfs_ganesha_cluster[0]):
            g.log.info("Nfs-ganesha Cluster exists is in healthy state")
        else:
            raise ExecutionError("Nfs-ganesha Cluster setup Failed")

        ret = set_nfs_ganesha_client_configuration(cls.clients)
        if not ret:
            raise ExecutionError("Failed to do client nfs ganesha "
                                 "configuration")

        for server in cls.servers:
            for client in cls.clients:
                cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then "
                       "echo \"%s %s\" >> /etc/hosts; fi" %
                       (client, socket.gethostbyname(client), client))
                ret, _, _ = g.run(server, cmd)
                if ret != 0:
                    g.log.error("Failed to add entry of client %s in "
                                "/etc/hosts of server %s" % (client, server))

        for client in cls.clients:
            for server in cls.servers:
                cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then "
                       "echo \"%s %s\" >> /etc/hosts; fi" %
                       (server, socket.gethostbyname(server), server))
                ret, _, _ = g.run(client, cmd)
                if ret != 0:
                    g.log.error("Failed to add entry of server %s in "
                                "/etc/hosts of client %s" % (server, client))
    def test_peer_detach_host(self):
        # peer Detaching specified server from cluster
        # peer Detaching detached server again
        # peer Detaching invalid host
        # peer Detaching Non exist host
        # peer Checking Core file created or not
        # Peer detach one node which contains the bricks of volume created
        # Peer detach force a node which is hosting bricks of a volume

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Assigning non existing host to variable
        self.non_exist_host = '256.256.256.256'

        # Assigning invalid ip to vaiable
        self.invalid_ip = '10.11.a'

        # Peer detach to specified server
        g.log.info("Start detach specified server :%s" % self.servers[1])
        ret, out, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0,
                         "Failed to detach server :%s" % self.servers[1])

        # Detached server detaching again, Expected to fail detach
        g.log.info("Start detached server detaching "
                   "again : %s" % self.servers[1])
        ret, out, _ = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(
            ret, 0, "Detach server should "
            "fail :%s" % self.servers[1])

        # Probing detached server
        g.log.info("Start probing detached server : %s" % self.servers[1])
        ret = peer_probe_servers(self.mnode, self.servers[1])
        self.assertTrue(
            ret, "Peer probe failed from %s to other "
            "server : %s" % (self.mnode, self.servers[1]))

        # Detach invalid host
        g.log.info("Start detaching invalid host :%s " % self.invalid_ip)
        ret, out, _ = peer_detach(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "Detach invalid host should "
            "fail :%s" % self.invalid_ip)

        # Detach non exist host
        g.log.info("Start detaching non exist host : %s" % self.non_exist_host)
        ret, out, _ = peer_detach(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "Detach non existing host "
            "should fail :%s" % self.non_exist_host)

        # Chekcing core. file created or not in "/", "/tmp", "/log/var/core
        # directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Creating Volume
        g.log.info("Started creating volume: %s" % self.volname)
        ret = self.setup_volume()
        self.assertTrue(ret, "Volume creation failed: %s" % self.volname)

        # Peer detach one node which contains the bricks of the volume created
        g.log.info("Start detaching server %s which is hosting "
                   "bricks of a volume" % self.servers[1])
        ret, out, err = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0,
                            "detach server should fail: %s" % self.servers[1])
        msg = ('peer detach: failed: Brick(s) with the peer ' +
               self.servers[1] + ' ' + 'exist in cluster')
        self.assertIn(msg, err, "Peer detach not failed with "
                      "proper error message")

        #  Peer detach force a node which is hosting bricks of a volume
        g.log.info("start detaching server %s with force option "
                   "which is hosting bricks of a volume" % self.servers[1])
        ret, out, err = peer_detach(self.mnode, self.servers[1], force=True)
        self.assertNotEqual(
            ret, 0, "detach server should fail with force "
            "option : %s" % self.servers[1])
        msg = ('peer detach: failed: Brick(s) with the peer ' +
               self.servers[1] + ' ' + 'exist in cluster')
        self.assertIn(
            msg, err, "Peer detach not failed with proper "
            "error message with force option")
    def test_profile_operations(self):
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile on the volume.
        4) Run profile info and see if all bricks are present or not.
        6) Create another volume.
        7) Run profile info without starting the profile.
        8) Run profile info with all possible options without starting
           the profile.
        """
        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s" % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_all_bricks(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(
                brick in out,
                "Brick %s not a part of profile info output." % brick)
            g.log.info("Brick %s showing in profile info output.", brick)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # Create and start a volume
        self.volume['name'] = "volume_2"
        self.volname = "volume_2"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Successfully created and started volume_2")

        # Check profile info on volume without starting profile
        ret, _, _ = profile_info(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Unexpected:Successfully ran profile info"
            " on volume: %s" % self.volname)
        g.log.info("Expected: Failed to run pofile info on volume: %s",
                   self.volname)

        # Running profile info with different profile options.
        profile_options = ('peek', 'incremental', 'clear', 'incremental peek',
                           'cumulative')
        for option in profile_options:
            # Getting and checking output of profile info.
            ret, _, _ = profile_info(self.mnode, self.volname, options=option)
            self.assertNotEqual(
                ret, 0, "Unexpected: Successfully ran profile info"
                " %s on volume: %s" % (option, self.volname))
            g.log.info(
                "Expected: Failed to execute profile info %s on"
                " volume: %s", option, self.volname)

        # Chekcing for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd has crashed on nodes.")
        g.log.info("No glusterd crashes observed.")
Exemple #26
0
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # trying to peer probe the node whose glusterd was stopped using its IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # reboot a server and then trying to peer probe at the time of reboot
        g.log.info("Rebooting %s and checking peer probe", self.servers[1])
        reboot = g.run_async(self.servers[1], "reboot")

        # Mandatory sleep for 3 seconds to make sure node is in halted state
        sleep(3)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s which has been issued a reboot ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        ret, _, _ = reboot.async_communicate()
        self.assertEqual(ret, 255, "reboot failed")

        # Validate if rebooted node is online or not
        count = 0
        while count < 40:
            sleep(15)
            ret, _ = are_nodes_online(self.servers[1])
            if ret:
                g.log.info("Node %s is online", self.servers[1])
                break
            count += 1
        self.assertTrue(ret, "Node in test not yet online")

        # check if glusterd is running post reboot
        ret = wait_for_glusterd_to_start(self.servers[1],
                                         glusterd_start_wait_timeout=120)
        self.assertTrue(ret, "Glusterd service is not running post reboot")

        # peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
Exemple #27
0
def main():
    if len(sys.argv) == 2:
        yaml_file = sys.argv[1]
    else:
        print("Kindly provide yaml file")
        sys.exit(0)

    config_dict = parser(yaml_file)
    servers = config_dict.get('servers')
    qe_repo = config_dict.get('repo')
    qe_host, qe_host_path = qe_repo[0].split(':')

    # Local path for sos-report
    dir_path = os.path.dirname(os.path.realpath(__file__)) + '/sosreport'
    try:
        os.stat(dir_path)
    except:
        os.mkdir(dir_path)

    # Create dir in qe repo
    # Using sshpass as the default repo has blocked passwordless login
    try:
        p = getpass.getpass(prompt="Enter password for host %s \n" % qe_host)
    except Exception as error:
        print('ERROR', error)

    command = 'sshpass -p %s ssh qe@%s "mkdir -p %s"' % (p, qe_host,
                                                         qe_host_path)
    ret, _, err = g.run_local(command)

    print("Starting to generate sos-report")
    for server in servers:
        # generate sos-report
        ret, output, err = g.run(server, "echo -ne '\n\n' | sosreport")
        assert (ret == 0), "Failed to generate sosreport for %s" % server
        remote_file_path = re.findall(r'/var/tmp/sosreport[^\s]+', output)[0]
        sosreport = remote_file_path.split('/tmp/')[1]
        remote_file_checksum = re.search(r'The checksum is: (\S+)',
                                         output).group(1)

        # Download sos-report to local system
        g.download(server, remote_file_path, dir_path)
        local_file_path = dir_path + '/' + sosreport

        # Upload sos-report to repo
        # Using sshpass as the default repo has blocked passwordless login
        command = 'sshpass -p %s scp -r %s qe@%s' % (p, local_file_path,
                                                     qe_repo[0])
        ret, _, err = g.run_local(command)
        # Getting md5sum of the report from remote repo
        command = ('sshpass -p %s ssh qe@%s "md5sum %s/%s"' %
                   (p, qe_host, qe_host_path, sosreport))
        ret, output, err = g.run_local(command)
        md5sum = output.split(" ")[0]

        # Match the md5sum with that of original
        if remote_file_checksum == md5sum:
            print("Sos-report %s copied successfully and checksum matches" %
                  sosreport)
        else:
            print("checksum match failed for %s" % sosreport)
            exit()


# Change permissions of qe repo

    command = ('sshpass -p %s ssh qe@%s "chmod -R 755 %s"' %
               (p, qe_host, qe_host_path))
    ret, output, err = g.run_local(command)
    assert (ret == 0), "Failed to change permission for %s" % qe_host_path
    print("Successfully changed permissions for  %s:%s" %
          (qe_host, qe_host_path))
    def test_peer_detach_host(self):
        # pylint: disable = too-many-statements
        # peer Detaching specified server from cluster
        # peer Detaching detached server again and checking the error msg
        # peer Detaching invalid host
        # peer Detaching Non exist host
        # peer Checking Core file created or not
        # Peer detach one node which contains the bricks of volume created
        # Peer detach force a node which is hosting bricks of a volume
        # Peer detach one node which hosts bricks of offline volume
        # Peer detach force a node which hosts bricks of offline volume

        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Assigning non existing host to variable
        self.non_exist_host = '256.256.256.256'

        # Assigning invalid ip to variable
        self.invalid_ip = '10.11.a'

        # Peer detach to specified server
        g.log.info("Start detach specified server :%s", self.servers[1])
        ret, _, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0,
                         "Failed to detach server :%s" % self.servers[1])

        # Detached server detaching again, Expected to fail detach
        g.log.info("Start detached server detaching "
                   "again : %s", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        self.assertNotEqual(
            ret, 0, "Detach server should "
            "fail :%s" % self.servers[1])
        self.assertEqual(
            err, "peer detach: failed: %s is not part of "
            "cluster\n" % self.servers[1], "Peer "
            "Detach didn't fail as expected")

        # Probing detached server
        g.log.info("Start probing detached server : %s", self.servers[1])
        ret = peer_probe_servers(self.mnode, self.servers[1])
        self.assertTrue(
            ret, "Peer probe failed from %s to other "
            "server : %s" % (self.mnode, self.servers[1]))

        # Detach invalid host
        g.log.info("Start detaching invalid host :%s ", self.invalid_ip)
        ret, _, _ = peer_detach(self.mnode, self.invalid_ip)
        self.assertNotEqual(
            ret, 0, "Detach invalid host should "
            "fail :%s" % self.invalid_ip)

        # Detach non exist host
        g.log.info("Start detaching non exist host : %s", self.non_exist_host)
        ret, _, _ = peer_detach(self.mnode, self.non_exist_host)
        self.assertNotEqual(
            ret, 0, "Detach non existing host "
            "should fail :%s" % self.non_exist_host)

        # Creating Volume
        g.log.info("Started creating volume: %s", self.volname)
        ret = self.setup_volume()
        self.assertTrue(ret, "Volume creation failed: %s" % self.volname)

        # Peer detach one node which contains the bricks of the volume created
        g.log.info(
            "Start detaching server %s which is hosting "
            "bricks of a volume", self.servers[1])
        self.check_detach_error_message(use_force=False)

        #  Peer detach force a node which is hosting bricks of a volume
        g.log.info(
            "Start detaching server using force %s which is hosting "
            "bricks of a volume", self.servers[1])
        self.check_detach_error_message()

        # Peer detach one node which contains bricks of an offline volume
        g.log.info("stopping the volume")
        ret, _, err = volume_stop(self.mnode, self.volname)
        msg = ('volume stop: ' + 'self.volname' + ': failed: Volume ' +
               'self.volname' + ' is not in the started state\n')
        if msg not in err:
            self.assertEqual(ret, 0,
                             "stopping volume %s failed" % self.volname)
        g.log.info(
            "Start to detach server %s which is hosting "
            "bricks of an offline volume", self.servers[1])
        self.check_detach_error_message(use_force=False)

        # Forceful Peer detach node which hosts bricks of offline volume
        g.log.info(
            "start detaching server %s with force option "
            "which is hosting bricks of a volume", self.servers[1])
        self.check_detach_error_message()

        # starting volume for proper cleanup
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "volume start failed")

        # Checking core. file created or not in "/", "/tmp", "/log/var/core
        # directory
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # Bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # Trying to peer probe the node whose glusterd was stopped using IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # Start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # Bring down the network for sometime
        network_status = bring_down_network_interface(self.servers[1], 150)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s when network is down",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        ret, _, _ = network_status.async_communicate()
        if ret != 0:
            g.log.error("Failed to perform network interface ops")

        # Peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # Checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
Exemple #30
0
    def test_profile_simultaneously_on_different_nodes(self):
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile on the volume.
        4) Create another volume.
        5) Start profile on the volume.
        6) Run volume status in a loop in one of the node.
        7) Run profile info for the new volume on one of the other node
        8) Run profile info for the new volume in loop for 100 times on
           the other node
        """
        # Timestamp of current test case of start time
        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Start IO on mount points.
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # Create and start a volume
        self.volume['name'] = "volume_2"
        self.volname = "volume_2"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")
        g.log.info("Successfully created and started volume_2")

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Run volume status on one of the node in loop
        cmd = "for i in `seq 1 100`;do gluster v status;done"
        proc1 = g.run_async(self.servers[1], cmd)

        # Check profile on one of the other node
        cmd = "gluster v profile %s info" % self.volname
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s"
            " on node %s" % (self.volname, self.mnode))
        g.log.info("Successfully run pofile info on volume: %s on node %s",
                   self.volname, self.mnode)

        # Run volume profile info on one of the other node in loop
        cmd = """for i in `seq 1 100`;do gluster v profile %s info;
              done""" % self.volname
        proc2 = g.run_async(self.servers[3], cmd)

        ret1, _, _ = proc1.async_communicate()
        ret2, _, _ = proc2.async_communicate()

        self.assertEqual(
            ret1, 0, "Failed to run volume status in a loop"
            " on node %s" % self.servers[1])
        g.log.info(
            "Successfully running volume status in a loop on node"
            " %s", self.servers[1])

        self.assertEqual(
            ret2, 0, "Failed to run profile info in a loop"
            " on node %s" % self.servers[3])
        g.log.info(
            "Successfully running volume status in a loop on node"
            " %s", self.servers[3])

        # Chekcing for core files.
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "glusterd service should not crash")
        g.log.info("No core file found, glusterd service running "
                   "successfully")

        # Checking whether glusterd is running or not
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd has crashed on nodes.")
        g.log.info("No glusterd crashes observed.")