Ejemplo n.º 1
0
Archivo: test.py Proyecto: guoanwu/daos
class TestWithServers(TestWithoutServers):
    """Run tests with DAOS servers and at least one client.

    Optionally run DAOS clients on specified hosts.  By default run a single
    DAOS client on the host executing the test.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a TestWithServers object."""
        super(TestWithServers, self).__init__(*args, **kwargs)

        self.server_managers = []
        self.agent_sessions = None
        self.setup_start_servers = True
        self.setup_start_agents = True
        self.agent_log = None
        self.server_log = None
        self.control_log = None
        self.helper_log = None
        self.client_log = None
        self.log_dir = os.path.split(os.getenv("D_LOG_FILE",
                                               "/tmp/server.log"))[0]
        self.test_id = "{}-{}".format(
            os.path.split(self.filename)[1], self.name.str_uid)

    def setUp(self):
        """Set up each test case."""
        super(TestWithServers, self).setUp()

        self.server_group = self.params.get("name", "/server_config/",
                                            "daos_server")

        # Determine which hosts to use as servers and optionally clients.
        # Support the use of a host type count to test with subsets of the
        # specified hosts lists
        test_servers = self.params.get("test_servers", "/run/hosts/*")
        test_clients = self.params.get("test_clients", "/run/hosts/*")
        server_count = self.params.get("server_count", "/run/hosts/*")
        client_count = self.params.get("client_count", "/run/hosts/*")
        # If server or client host list are defined through valid slurm
        # partition names override any hosts specified through lists.
        test_servers, self.partition_servers = self.get_partition_hosts(
            "server_partition", test_servers)
        test_clients, self.partition_clients = self.get_partition_hosts(
            "client_partition", test_clients)

        # Supported combinations of yaml hosts arguments:
        #   - test_servers [+ server_count]
        #   - test_servers [+ server_count] + test_clients [+ client_count]
        if test_servers and test_clients:
            self.hostlist_servers = test_servers[:server_count]
            self.hostlist_clients = test_clients[:client_count]
        elif test_servers:
            self.hostlist_servers = test_servers[:server_count]
        self.log.info("hostlist_servers:  %s", self.hostlist_servers)
        self.log.info("hostlist_clients:  %s", self.hostlist_clients)

        # Find a configuration that meets the test requirements
        self.config = Configuration(self.params,
                                    self.hostlist_servers,
                                    debug=self.debug)
        if not self.config.set_config(self):
            self.cancel("Test requirements not met!")

        # If a specific count is specified, verify enough servers/clients are
        # specified to satisy the count
        host_count_checks = (
            ("server", server_count,
             len(self.hostlist_servers) if self.hostlist_servers else 0),
            ("client", client_count,
             len(self.hostlist_clients) if self.hostlist_clients else 0))
        for host_type, expected_count, actual_count in host_count_checks:
            if expected_count:
                self.assertEqual(
                    expected_count, actual_count,
                    "Test requires {} {}; {} specified".format(
                        expected_count, host_type, actual_count))

        # Create host files
        if self.hostlist_clients:
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)

        # Start the clients (agents)
        if self.setup_start_agents:
            self.agent_sessions = agent_utils.run_agent(
                self, self.hostlist_servers, self.hostlist_clients)

        # Start the servers
        if self.setup_start_servers:
            self.start_servers()

    def tearDown(self):
        """Tear down after each test case."""
        # Destroy any containers first
        errors = self.destroy_containers(self.container)

        # Destroy any pools next
        errors.extend(self.destroy_pools(self.pool))

        # Stop the agents
        errors.extend(self.stop_agents())

        # Stop the servers
        errors.extend(self.stop_servers())

        # Complete tear down actions from the inherited class
        try:
            super(TestWithServers, self).tearDown()
        except OSError as error:
            errors.append(
                "Error running inheritted teardown(): {}".format(error))

        # Fail the test if any errors occurred during tear down
        if errors:
            self.fail("Errors detected during teardown:\n  - {}".format(
                "\n  - ".join(errors)))

    def destroy_containers(self, containers):
        """Close and destroy one or more containers.

        Args:
            containers (object): a list of or single DaosContainer or
                TestContainer object(s) to destroy

        Returns:
            list: a list of exceptions raised destroying the containers

        """
        error_list = []
        if containers:
            if not isinstance(containers, (list, tuple)):
                containers = [containers]
            self.multi_log("Destroying containers")
            for container in containers:
                # Only close a container that has been openned by the test
                if not hasattr(container, "opened") or container.opened:
                    try:
                        container.close()
                    except (DaosApiError, TestFail) as error:
                        self.multi_log("  {}".format(error))
                        error_list.append(
                            "Error closing the container: {}".format(error))

                # Only destroy a container that has been created by the test
                if not hasattr(container, "attached") or container.attached:
                    try:
                        container.destroy()
                    except (DaosApiError, TestFail) as error:
                        self.multi_log("  {}".format(error))
                        error_list.append(
                            "Error destroying container: {}".format(error))
        return error_list

    def destroy_pools(self, pools):
        """Disconnect and destroy one or more pools.

        Args:
            pools (object): a list of or single DaosPool or TestPool object(s)
                to destroy

        Returns:
            list: a list of exceptions raised destroying the pools

        """
        error_list = []
        if pools:
            if not isinstance(pools, (list, tuple)):
                pools = [pools]
            self.multi_log("Destroying pools")
            for pool in pools:
                # Only disconnect a pool that has been connected by the test
                if not hasattr(pool, "connected") or pool.connected:
                    try:
                        pool.disconnect()
                    except (DaosApiError, TestFail) as error:
                        self.multi_log("  {}".format(error))
                        error_list.append(
                            "Error disconnecting pool: {}".format(error))

                # Only destroy a pool that has been created by the test
                if not hasattr(pool, "attached") or pool.attached:
                    try:
                        pool.destroy(1)
                    except (DaosApiError, TestFail) as error:
                        self.multi_log("  {}".format(error))
                        error_list.append(
                            "Error destroying pool: {}".format(error))
        return error_list

    def stop_agents(self):
        """Stop the daos agents.

        Returns:
            list: a list of exceptions raised stopping the agents

        """
        error_list = []
        if self.agent_sessions:
            self.multi_log("Stopping agents")
            try:
                agent_utils.stop_agent(self.agent_sessions,
                                       self.hostlist_clients)
            except agent_utils.AgentFailed as error:
                self.multi_log("  {}".format(error))
                error_list.append("Error stopping agents: {}".format(error))
        return error_list

    def stop_servers(self):
        """Stop the daos server and I/O servers.

        Returns:
            list: a list of exceptions raised stopping the servers

        """
        error_list = []
        if self.server_managers:
            for server_manager in self.server_managers:
                try:
                    server_manager.stop()
                except ServerFailed as error:
                    self.multi_log("  {}".format(error))
                    error_list.append(
                        "Error stopping servers: {}".format(error))
        return error_list

    def start_servers(self, server_groups=None):
        """Start the servers and clients.

        Args:
            server_groups (dict, optional): [description]. Defaults to None.
        """
        if server_groups is None:
            server_groups = {self.server_group: self.hostlist_servers}

        if isinstance(server_groups, dict):
            # Optionally start servers on a different subset of hosts with a
            # different server group
            for group, hosts in server_groups.items():
                self.log.info("Starting servers: group=%s, hosts=%s", group,
                              hosts)
                self.server_managers.append(
                    ServerManager(self.bin,
                                  os.path.join(self.ompi_prefix, "bin")))
                self.server_managers[-1].get_params(self)
                self.server_managers[-1].runner.job.yaml_params.name = group
                self.server_managers[-1].hosts = (hosts, self.workdir,
                                                  self.hostfile_servers_slots)
                if self.prefix != "/usr":
                    if self.server_managers[-1].runner.export.value is None:
                        self.server_managers[-1].runner.export.value = []
                    self.server_managers[-1].runner.export.value.extend(
                        ["PATH"])
                load_mpi("orterun")
                yamlfile = os.path.join(self.tmp, "daos_avocado_test.yaml")

                try:
                    self.server_managers[-1].start(yamlfile)
                except ServerFailed as error:
                    self.multi_log("  {}".format(error))
                    self.fail("Error starting server: {}".format(error))

    def get_partition_hosts(self, partition_key, host_list):
        """[summary].

        Args:
            partition_key ([type]): [description]
            host_list ([type]): [description]

        Returns:
            tuple: [description]

        """
        hosts = []
        partiton_name = self.params.get(partition_key, "/run/hosts/*")
        if partiton_name is not None:
            cmd = "scontrol show partition {}".format(partiton_name)

            try:
                result = process.run(cmd, shell=True, timeout=10)
            except process.CmdError as error:
                self.log.warning("Unable to obtain hosts from the {} slurm "
                                 "partition: {}".format(partiton_name, error))
                result = None
            if result:
                output = result.stdout
                try:
                    hosts = list(
                        NodeSet(re.findall(r"\s+Nodes=(.*)", output)[0]))
                except (NodeSetParseError, IndexError):
                    self.log.warning(
                        "Unable to obtain hosts from the {} slurm partition "
                        "output: {}".format(partiton_name, output))

        if hosts:
            return hosts, partiton_name
        else:
            return host_list, None

    def update_log_file_names(self, test_name=None):
        """Define agent, server, and client log files that include the test id.

        Args:
            test_name (str, optional): name of test variant
        """
        if test_name:
            # Overwrite the test id with the specified test name
            self.test_id = test_name

        # Update the log file names.  The path is defined throught the
        # DAOS_TEST_LOG_DIR environment variable.
        self.agent_log = "{}_daos_agent.log".format(self.test_id)
        self.server_log = "{}_daos_server.log".format(self.test_id)
        self.control_log = "{}_daos_control.log".format(self.test_id)
        self.helper_log = "{}_daos_admin.log".format(self.test_id)
        self.client_log = "{}_daos_client.log".format(self.test_id)

    def get_dmg_command(self, index=0):
        """Get a DmgCommand setup to interact with server manager index.

        Return a DmgCommand object configured with:
            - the "-l" parameter assigned to the server's access point list
            - the "-i" parameter assigned to the server's interactive mode

        This method is intended to be used by tests that wants to use dmg to
        create and destroy pool. Pass in the object to TestPool constructor.

        Access point should be passed in to -l regardless of the number of
        servers.

        Args:
            index (int, optional): Server index. Defaults to 0.

        Returns:
            DmgCommand: New DmgCommand object.

        """
        dmg = DmgCommand(self.bin)
        dmg.hostlist.value = self.server_managers[index].runner.job.\
            yaml_params.access_points.value
        dmg.insecure.value = \
            self.server_managers[index].insecure.value
        return dmg

    def prepare_pool(self):
        """Create a pool, read the pool parameters from the yaml, create, and
        connect.

        This sequence is common for a lot of the container tests.
        """
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)
        self.pool.create()
        self.pool.connect()
Ejemplo n.º 2
0
    def setUp(self):
        """Set up each test case."""
        super(TestWithServers, self).setUp()

        self.server_group = self.params.get("name", "/server_config/",
                                            "daos_server")

        # Determine which hosts to use as servers and optionally clients.
        # Support the use of a host type count to test with subsets of the
        # specified hosts lists
        test_servers = self.params.get("test_servers", "/run/hosts/*")
        test_clients = self.params.get("test_clients", "/run/hosts/*")
        server_count = self.params.get("server_count", "/run/hosts/*")
        client_count = self.params.get("client_count", "/run/hosts/*")
        # If server or client host list are defined through valid slurm
        # partition names override any hosts specified through lists.
        test_servers, self.partition_servers = self.get_partition_hosts(
            "server_partition", test_servers)
        test_clients, self.partition_clients = self.get_partition_hosts(
            "client_partition", test_clients)

        # Supported combinations of yaml hosts arguments:
        #   - test_servers [+ server_count]
        #   - test_servers [+ server_count] + test_clients [+ client_count]
        if test_servers and test_clients:
            self.hostlist_servers = test_servers[:server_count]
            self.hostlist_clients = test_clients[:client_count]
        elif test_servers:
            self.hostlist_servers = test_servers[:server_count]
        self.log.info("hostlist_servers:  %s", self.hostlist_servers)
        self.log.info("hostlist_clients:  %s", self.hostlist_clients)

        # Find a configuration that meets the test requirements
        self.config = Configuration(self.params,
                                    self.hostlist_servers,
                                    debug=self.debug)
        if not self.config.set_config(self):
            self.cancel("Test requirements not met!")

        # If a specific count is specified, verify enough servers/clients are
        # specified to satisy the count
        host_count_checks = (
            ("server", server_count,
             len(self.hostlist_servers) if self.hostlist_servers else 0),
            ("client", client_count,
             len(self.hostlist_clients) if self.hostlist_clients else 0))
        for host_type, expected_count, actual_count in host_count_checks:
            if expected_count:
                self.assertEqual(
                    expected_count, actual_count,
                    "Test requires {} {}; {} specified".format(
                        expected_count, host_type, actual_count))

        # Create host files
        if self.hostlist_clients:
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)

        # Start the clients (agents)
        if self.setup_start_agents:
            self.agent_sessions = agent_utils.run_agent(
                self, self.hostlist_servers, self.hostlist_clients)

        # Start the servers
        if self.setup_start_servers:
            self.start_servers()