Esempio n. 1
0
    def _get_collectibles(self, c_profiler):
        self.sysinfo_files = gather_collectibles_config(self.config)

        profiler = c_profiler
        if profiler is None:
            self.profiler = self.config.get("sysinfo.collect.profiler")
        else:
            self.profiler = profiler

        profiler_file = self.config.get("sysinfo.collectibles.profilers")
        if os.path.isfile(profiler_file):
            self.sysinfo_files["profilers"] = genio.read_all_lines(
                profiler_file)
            log.info("Profilers configured by file: %s", profiler_file)
            if not self.sysinfo_files["profilers"]:
                self.profiler = False

            if self.profiler is False:
                if not self.sysinfo_files["profilers"]:
                    log.info("Profiler disabled: no profiler"
                             " commands configured")
                else:
                    log.info("Profiler disabled")
        else:
            log.debug("File %s does not exist.", profiler_file)
            self.sysinfo_files["profilers"] = []
Esempio n. 2
0
def runServer(hostfile, setname, basepath):

    global sessions
    try:
        server_count = len(genio.read_all_lines(hostfile))

        initial_cmd = "/bin/sh"
        server_cmd = basepath + "/install/bin/orterun --np {0} ".format(
            server_count)
        server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        server_cmd += "-x D_LOG_MASK=DEBUG,RPC=ERR,MEM=ERR -x D_LOG_FILE="
        server_cmd += basepath + "/install/tmp/daos.log "
        server_cmd += "-x LD_LIBRARY_PATH={0}/install/lib:".format(basepath)
        server_cmd += "{0}/install/lib/daos_srv ".format(basepath)
        server_cmd += basepath + "/install/bin/daos_server -g {0} -c 1 ".format(
            setname)
        server_cmd += " -a" + basepath + "/install/tmp/"

        print "Start CMD>>>>{0}".format(server_cmd)

        sessions[setname] = aexpect.ShellSession(initial_cmd)
        if (sessions[setname].is_responsive()):
            sessions[setname].sendline(server_cmd)
            sessions[setname].read_until_any_line_matches(
                "DAOS server (v0.0.2) started on rank 0*",
                print_func=printFunc)
            print "<SERVER> server started"
    except Exception as e:
        print "<SERVER> Exception occurred: {0}".format(str(e))
        raise ServerFailed("Server didn't start!")
Esempio n. 3
0
    def test_launch(self):

        host = self.params.get("hostname", '/tests/', "localhost")
        hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1")
        urifile = self.params.get("urifile", '/files/', "/tmp/urifile")

        server_count = len(genio.read_all_lines(hostfile))

        get_prompt = "/bin/bash"
        launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count)
        launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        launch_cmd += "--report-uri {0} -x D_LOG_FILE=/mnt/shared/test/tmp/daos.log ".format(
            urifile)
        launch_cmd += "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib:/home/skirvan/daos_m10/install/lib/daos_srv "
        launch_cmd += "../../install/bin/daos_server -d /tmp/.daos -g daos_server"

        try:
            session = aexpect.ShellSession(get_prompt)
            if (session.is_responsive()):
                session.sendline(launch_cmd)
                session.read_until_any_line_matches(
                    "XDAOS server (v0.0.2) started on rank *",
                    timeout=5.0,
                    print_func=printFunc)
            else:
                self.fail("Server did not start.\n")
        except Exception as e:
            self.fail("Server did not start.\n")

        session.sendcontrol("c")
Esempio n. 4
0
 def run_event(self, filename, perf_flags):
     for line in genio.read_all_lines(filename):
         cmd = "%s%s sleep 1" % (perf_flags, line)
         output = process.run(cmd, shell=True,
                              ignore_status=True)
         if output.exit_status != 0:
             self.fail_cmd.append(cmd)
Esempio n. 5
0
def runServer(hostfile, urifile):

    global session
    try:
        server_count = len(genio.read_all_lines(hostfile))

        initial_cmd = "/bin/bash"
        server_cmd = "/home/skirvan/daos_m10/install/bin/orterun --np {0} ".format(
            server_count)
        server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        server_cmd += "--report-uri {0} -x DD_LOG=/mnt/shared/test/tmp/daos.log ".format(
            urifile)
        server_cmd += "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib:/home/skirvan/daos_m10/install/lib/daos_srv "
        server_cmd += "/home/skirvan/daos_m10/install/bin/daos_server -g daos_server"

        print "Start CMD>>>>{0}".format(server_cmd)

        session = aexpect.ShellSession(initial_cmd)
        if (session.is_responsive()):
            session.sendline(server_cmd)
            session.read_until_any_line_matches(
                "DAOS server (v0.0.2) started on rank 0*",
                print_func=printFunc)
            print "<SERVER> server started"
    except Exception as e:
        print "<SERVER> Exception occurred: {0}".format(str(e))
        raise ServerFailed("Server didn't start!")
 def test_probe(self):
     outpt = process.run("perf probe select_task_rq_fair:0", sudo=True)
     outpt = outpt.stderr.decode("utf-8")
     self._check_duplicate_probe(outpt)
     outpt = genio.read_all_lines("/sys/kernel/debug/tracing/kprobe_events")
     self._check_duplicate_probe(outpt)
     if self.fail_flag:
         self.fail("perf is placing multiple probes at the same location ")
Esempio n. 7
0
    def test_server_stderr(self):
        self.log.info('Testing server stderr collection')
        s = gdb.GDBServer()
        s.exit()
        self.assertTrue(os.path.exists(s.stderr_path))

        stderr_lines = genio.read_all_lines(s.stderr_path)
        listening_line = "Listening on port %s\n" % s.port
        self.assertIn(listening_line, stderr_lines)
Esempio n. 8
0
 def get_depend_modules(self, module):
     """
     Returns the dependent modules
     """
     config_path = os.path.join(os.path.abspath(''),
                                "module_unload_load.py.data/config")
     for line in genio.read_all_lines(config_path):
         if module == line.split('=')[0]:
             return line.split('=')[-1]
Esempio n. 9
0
    def test_server_stderr(self):
        self.log.info('Testing server stderr collection')
        s = gdb.GDBServer()
        s.exit()
        self.assertTrue(os.path.exists(s.stderr_path))

        stderr_lines = genio.read_all_lines(s.stderr_path)
        listening_line = "Listening on port %s\n" % s.port
        self.assertIn(listening_line, stderr_lines)
Esempio n. 10
0
 def built_in_module(self, module):
     """
     checking whether the given module is built_in module or not
     """
     path = "/lib/modules/%s/modules.builtin" % self.uname
     for each in genio.read_all_lines(path):
         out = process.getoutput(each.split('/')[-1])
         if module == out.split('.'[0]):
             return True
         return False
 def sysfs_value_check(self):
     '''
     Checks if sysfs value matches to test value, Returns True if yes.
     Returns False otherwise.
     '''
     path = '/sys/module/%s/parameters/%s' % (self.module, self.param_name)
     value_check = genio.read_all_lines(path)
     if self.param_value not in value_check:
         return False
     return True
Esempio n. 12
0
 def run_event(self, filename, eventname):
     if eventname == 'raw':
         perf_flags = "perf stat -e r"
     elif eventname == 'name':
         perf_flags = "perf stat -e "
     for line in genio.read_all_lines(filename):
         cmd = "%s%s sleep 1" % (perf_flags, line)
         output = process.run(cmd, shell=True, ignore_status=True)
         if output.exit_status != 0:
             self.fail_cmd.append(cmd)
 def test_watch_point_check(self):
     if os.path.exists('/dev/wptest'):
         self.run_cmd()
         for line in genio.read_all_lines('/proc/kallsyms'):
             if 'arg1' in line:
                 value = line.split(' ')[0]
                 cmd = "perf record -e mem:0x%s &" % value
                 process.run(cmd, ignore_bg_processes=True,
                             ignore_status=True)
                 self.run_cmd()
     else:
         self.fail("unable to find the directory")
Esempio n. 14
0
    def test_server_stdout(self):
        self.log.info('Testing server stdout/stderr collection')
        s = gdb.GDBServer()
        c = gdb.GDB()
        c.connect(s.port)
        c.set_file(self.return99_binary_path)
        c.run()
        s.exit()

        self.assertTrue(os.path.exists(s.stdout_path))
        self.assertTrue(os.path.exists(s.stderr_path))

        stdout_lines = genio.read_all_lines(s.stdout_path)
        self.assertIn("return 99\n", stdout_lines)
Esempio n. 15
0
    def test_server_stdout(self):
        self.log.info('Testing server stdout/stderr collection')
        s = gdb.GDBServer()
        c = gdb.GDB()
        c.connect(s.port)
        c.set_file(self.return99_binary_path)
        c.run()
        s.exit()

        self.assertTrue(os.path.exists(s.stdout_path))
        self.assertTrue(os.path.exists(s.stderr_path))

        stdout_lines = genio.read_all_lines(s.stdout_path)
        self.assertIn("return 99\n", stdout_lines)
 def test_probe(self):
     output = process.run("perf probe select_task_rq_fair:15", sudo=True)
     if 'select_task_rq_fair' in output.stderr.decode(
             "utf-8") and 'select_task_rq_fair_' in output.stderr.decode(
                 "utf-8"):
         fail_flag = 1
     output = genio.read_all_lines(
         "/sys/kernel/debug/tracing/kprobe_events")
     for line in output:
         if 'select_task_rq_fair' in line or 'select_task_rq_fair_' in line:
             fail_flag += 1
     if (fail_flag >= 2):
         self.fail(
             "perf probe is placing multiple probe at the same location ")
Esempio n. 17
0
    def test_bsod(self):
        try:
            from PIL import Image
            from PIL import ImageDraw
        except ImportError:
            return

        text = ["DREADED BLUE SCREEN OF DEATH"]
        dmesg_path = os.path.join(self.job.logdir, "sysinfo", "pre", "dmesg_-c")
        self.log.info("dmesg_path: %s", dmesg_path)
        if os.path.exists(dmesg_path):
            text = genio.read_all_lines(dmesg_path)[0:50]

        bsod = Image.new("RGB", (640, 480), "blue")
        draw = ImageDraw.Draw(bsod)
        y = 2
        for line in text:
            draw.text((2, y), line)
            y += 12
        bsod.save(os.path.join(self.outputdir, "bsod.png"))
Esempio n. 18
0
def gather_collectibles_config(config):
    sysinfo_files = {}

    for collectible in ["commands", "files", "fail_commands", "fail_files"]:
        tmp_file = config.get(f"sysinfo.collectibles.{collectible}")
        if os.path.isfile(tmp_file):
            log.info("%s configured by file: %s", collectible.title(),
                     tmp_file)
            sysinfo_files[collectible] = genio.read_all_lines(tmp_file)
        else:
            log.debug("File %s does not exist.", tmp_file)
            sysinfo_files[collectible] = []

        if "fail_" in collectible:
            list1 = sysinfo_files[collectible]
            list2 = sysinfo_files[collectible.split("_")[1]]
            sysinfo_files[collectible] = [
                tmp for tmp in list1 if tmp not in list2
            ]
    return sysinfo_files
Esempio n. 19
0
def gather_collectibles_config(config):
    sysinfo_files = {}

    for collectible in ['commands', 'files', 'fail_commands', 'fail_files']:
        tmp_file = config.get(f'sysinfo.collectibles.{collectible}')
        if os.path.isfile(tmp_file):
            log.info('%s configured by file: %s', collectible.title(),
                     tmp_file)
            sysinfo_files[collectible] = genio.read_all_lines(tmp_file)
        else:
            log.debug('File %s does not exist.', tmp_file)
            sysinfo_files[collectible] = []

        if 'fail_' in collectible:
            list1 = sysinfo_files[collectible]
            list2 = sysinfo_files[collectible.split('_')[1]]
            sysinfo_files[collectible] = [
                tmp for tmp in list1 if tmp not in list2
            ]
    return sysinfo_files
Esempio n. 20
0
    def test_bsod(self):
        try:
            from PIL import Image
            from PIL import ImageDraw
        except ImportError:
            return

        text = ["DREADED BLUE SCREEN OF DEATH"]
        dmesg_path = os.path.join(self.job.logdir, "sysinfo", "pre",
                                  "dmesg_-c")
        self.log.info("dmesg_path: %s", dmesg_path)
        if os.path.exists(dmesg_path):
            text = genio.read_all_lines(dmesg_path)[0:50]

        bsod = Image.new("RGB", (640, 480), "blue")
        draw = ImageDraw.Draw(bsod)
        y = 2
        for line in text:
            draw.text((2, y), line)
            y += 12
        bsod.save(os.path.join(self.outputdir, "bsod.png"))
Esempio n. 21
0
    def test_launch(self):
        """
        Test launching a DAOS server.
        :avocado: tags=all,wireup,full_regression,tiny,launchserver
        """
        hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1")
        urifile = self.params.get("urifile", '/files/', "/tmp/urifile")

        server_count = len(genio.read_all_lines(hostfile))

        get_prompt = "/bin/bash"
        launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count)
        launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        launch_cmd += (
            "--report-uri {0} "
            "-x D_LOG_FILE=/mnt/shared/test/tmp/daos.log "
            "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib"
            ":/home/skirvan/daos_m10/install/lib/daos_srv "
            "../../install/bin/daos_server --debug start -d /tmp/.daos "
            "-g daos_server".format(urifile))

        try:
            session = aexpect.ShellSession(get_prompt)
            if session.is_responsive():
                session.sendline(launch_cmd)
                session.read_until_any_line_matches(
                    "XDAOS server (v0.0.2) started on rank *",
                    timeout=5.0,
                    print_func=print_helper)
            else:
                self.fail("Server did not start.\n")
        except (aexpect.ExpectError, aexpect.ExpectProcessTerminatedError,
                aexpect.ExpectTimeoutError, aexpect.ShellCmdError,
                aexpect.ShellError, aexpect.ShellProcessTerminatedError,
                aexpect.ShellStatusError,
                aexpect.ShellTimeoutError) as dummy_e:
            self.fail("Server did not start.\n")

        session.sendcontrol("c")
Esempio n. 22
0
    def test_launch(self):
        """
        Test launching a DAOS server.
        """
        hostfile = self.params.get("hostfile1", '/files/', "/tmp/hostfile1")
        urifile = self.params.get("urifile", '/files/', "/tmp/urifile")

        server_count = len(genio.read_all_lines(hostfile))

        get_prompt = "/bin/bash"
        launch_cmd = "../../install/bin/orterun --np {0} ".format(server_count)
        launch_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        launch_cmd += (
            "--report-uri {0} "
            "-x D_LOG_FILE=/mnt/shared/test/tmp/daos.log "
            "-x LD_LIBRARY_PATH=/home/skirvan/daos_m10/install/lib"
            ":/home/skirvan/daos_m10/install/lib/daos_srv "
            "../../install/bin/daos_server -d /tmp/.daos "
            "-g daos_server".format(urifile)
            )

        try:
            session = aexpect.ShellSession(get_prompt)
            if session.is_responsive():
                session.sendline(launch_cmd)
                session.read_until_any_line_matches(
                    "XDAOS server (v0.0.2) started on rank *",
                    timeout=5.0,
                    print_func=print_helper)
            else:
                self.fail("Server did not start.\n")
        except (aexpect.ExpectError, aexpect.ExpectProcessTerminatedError,
                aexpect.ExpectTimeoutError, aexpect.ShellCmdError,
                aexpect.ShellError, aexpect.ShellProcessTerminatedError,
                aexpect.ShellStatusError, aexpect.ShellTimeoutError) as dummy_e:
            self.fail("Server did not start.\n")

        session.sendcontrol("c")
Esempio n. 23
0
def run_server(hostfile, setname, basepath, uri_path=None, env_dict=None):
    """
    Launches DAOS servers in accordance with the supplied hostfile.
    """
    global SESSIONS
    try:
        servers = ([
            line.split(' ')[0] for line in genio.read_all_lines(hostfile)
        ])
        server_count = len(servers)

        #Create the DAOS server configuration yaml file to pass
        #with daos_server -o <FILE_NAME>
        create_server_yaml(basepath)

        # first make sure there are no existing servers running
        kill_server(servers)

        # clean the tmpfs on the servers
        for server in servers:
            subprocess.check_call([
                'ssh', server,
                ("find /mnt/daos -mindepth 1 -maxdepth 1 "
                 "-print0 | xargs -0r rm -rf")
            ])

        # pile of build time variables
        with open(os.path.join(basepath, ".build_vars.json")) as json_vars:
            build_vars = json.load(json_vars)
        orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun")
        daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server")

        env_args = []
        # Add any user supplied environment
        if env_dict is not None:
            for key, value in env_dict.items():
                os.environ[key] = value
                env_args.extend(["-x", "{}={}".format(key, value)])

        server_cmd = [orterun_bin, "--np", str(server_count)]
        if uri_path is not None:
            server_cmd.extend(["--report-uri", uri_path])
        server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"])
        server_cmd.extend(env_args)
        # For now run server in insecure mode until Certificate tests are in place
        server_cmd.extend([
            daos_srv_bin, "-i", "-a",
            os.path.join(basepath, "install", "tmp"), "-o",
            '{}/{}'.format(basepath, AVOCADO_FILE)
        ])

        print("Start CMD>>>>{0}".format(' '.join(server_cmd)))

        resource.setrlimit(resource.RLIMIT_CORE,
                           (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

        SESSIONS[setname] = subprocess.Popen(server_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
        fdesc = SESSIONS[setname].stdout.fileno()
        fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL)
        fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK)
        timeout = 600
        start_time = time.time()
        result = 0
        pattern = "DAOS I/O server"
        expected_data = "Starting Servers\n"
        while True:
            output = ""
            try:
                output = SESSIONS[setname].stdout.read()
            except IOError as excpn:
                if excpn.errno != errno.EAGAIN:
                    raise excpn
                continue
            match = re.findall(pattern, output)
            expected_data += output
            result += len(match)
            if not output or result == server_count or \
               time.time() - start_time > timeout:
                print("<SERVER>: {}".format(expected_data))
                if result != server_count:
                    raise ServerFailed("Server didn't start!")
                break
        print("<SERVER> server started and took %s seconds to start" % \
              (time.time() - start_time))
    except Exception as error:
        print("<SERVER> Exception occurred: {0}".format(str(error)))
        traceback.print_exception(error.__class__, error, sys.exc_info()[2])
        # we need to end the session now -- exit the shell
        try:
            SESSIONS[setname].send_signal(signal.SIGINT)
            time.sleep(5)
            # get the stderr
            error = SESSIONS[setname].stderr.read()
            if SESSIONS[setname].poll() is None:
                SESSIONS[setname].kill()
            retcode = SESSIONS[setname].wait()
            print("<SERVER> server start return code: {}\n" \
                  "stderr:\n{}".format(retcode, error))
        except KeyError:
            pass
        raise ServerFailed("Server didn't start!")
Esempio n. 24
0
def runServer(hostfile, setname, basepath, uri_path=None, env_dict=None):
    """
    Launches DAOS servers in accordance with the supplied hostfile.

    """
    global sessions
    try:
        servers = [
            line.split(' ')[0] for line in genio.read_all_lines(hostfile)
        ]
        server_count = len(servers)

        # first make sure there are no existing servers running
        killServer(servers)

        # pile of build time variables
        with open(os.path.join(basepath, ".build_vars.json")) as json_vars:
            build_vars = json.load(json_vars)
        orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun")
        daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server")

        # before any set in env are added to env_args, add any user supplied
        # envirables to environment first
        if env_dict is not None:
            for k, v in env_dict.items():
                os.environ[k] = v

        env_vars = [
            'CRT_.*', 'DAOS_.*', 'ABT_.*', 'DD_(STDERR|LOG)', 'D_LOG_.*',
            'OFI_.*'
        ]

        env_args = []
        for (env_var, env_val) in os.environ.items():
            for pat in env_vars:
                if re.match(pat, env_var):
                    env_args.extend(["-x", "{}={}".format(env_var, env_val)])

        server_cmd = [orterun_bin, "--np", str(server_count)]
        if uri_path is not None:
            server_cmd.extend(["--report-uri", uri_path])
        server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"])
        server_cmd.extend(env_args)
        server_cmd.extend([
            "-x", "DD_SUBSYS=all", "-x", "DD_MASK=all", daos_srv_bin, "-g",
            setname, "-c", "1", "-a",
            os.path.join(basepath, "install", "tmp"), "-d",
            os.path.join(os.sep, "var", "run", "user", str(os.geteuid()))
        ])

        print("Start CMD>>>>{0}".format(' '.join(server_cmd)))

        resource.setrlimit(resource.RLIMIT_CORE,
                           (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

        sessions[setname] = subprocess.Popen(server_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
        fd = sessions[setname].stdout.fileno()
        fl = fcntl.fcntl(fd, fcntl.F_GETFL)
        fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
        timeout = 600
        start_time = time.time()
        result = 0
        pattern = "DAOS I/O server"
        expected_data = "Starting Servers\n"
        while True:
            output = ""
            try:
                output = sessions[setname].stdout.read()
            except IOError as excpn:
                if excpn.errno != errno.EAGAIN:
                    raise excpn
                continue
            match = re.findall(pattern, output)
            expected_data += output
            result += len(match)
            if not output or result == server_count or \
               time.time() - start_time > timeout:
                print("<SERVER>: {}".format(expected_data))
                if result != server_count:
                    raise ServerFailed("Server didn't start!")
                break
        print("<SERVER> server started and took %s seconds to start" % \
              (time.time() - start_time))
    except Exception as excpn:
        print("<SERVER> Exception occurred: {0}".format(str(excpn)))
        # we need to end the session now -- exit the shell
        try:
            sessions[setname].send_signal(signal.SIGINT)
            time.sleep(5)
            # get the stderr
            error = sessions[setname].stderr.read()
            if sessions[setname].poll() is None:
                sessions[setname].kill()
            retcode = sessions[setname].wait()
            print("<SERVER> server start return code: {}\n" \
                  "stderr:\n{}".format(retcode, error))
        except KeyError:
            pass
        raise ServerFailed("Server didn't start!")
Esempio n. 25
0
def run_server(test,
               hostfile,
               setname,
               uri_path=None,
               env_dict=None,
               clean=True):
    # pylint: disable=unused-argument
    """Launch DAOS servers in accordance with the supplied hostfile.

    Args:
        test (Test): avocado Test object
        hostfile (str): hostfile defining on which hosts to start servers
        setname (str): session name
        uri_path (str, optional): path to uri file. Defaults to None.
        env_dict (dict, optional): dictionary on env variable names and values.
            Defaults to None.
        clean (bool, optional): clean the mount point. Defaults to True.

    Raises:
        ServerFailed: if there is an error starting the servers

    """
    global SESSIONS  # pylint: disable=global-variable-not-assigned
    try:
        servers = ([
            line.split(' ')[0] for line in genio.read_all_lines(hostfile)
        ])
        server_count = len(servers)

        # Pile of build time variables
        with open("../../.build_vars.json") as json_vars:
            build_vars = json.load(json_vars)

        # Create the DAOS server configuration yaml file to pass
        # with daos_server -o <FILE_NAME>
        print("Creating the server yaml file in {}".format(test.tmp))
        server_yaml = os.path.join(test.tmp, AVOCADO_FILE)
        server_config = DaosServerConfig()
        server_config.get_params(test)
        access_points = ":".join((servers[0], str(server_config.port)))
        server_config.access_points.value = access_points.split()
        server_config.update_log_files(getattr(test, "control_log"),
                                       getattr(test, "helper_log"),
                                       getattr(test, "server_log"))
        server_config.create_yaml(server_yaml)

        # first make sure there are no existing servers running
        print("Removing any existing server processes")
        kill_server(servers)

        # clean the tmpfs on the servers
        if clean:
            print("Cleaning the server tmpfs directories")
            result = pcmd(servers,
                          "find /mnt/daos -mindepth 1 -maxdepth 1 -print0 | "
                          "xargs -0r rm -rf",
                          verbose=False)
            if len(result) > 1 or 0 not in result:
                raise ServerFailed(
                    "Error cleaning tmpfs on servers: {}".format(", ".join(
                        [str(result[key]) for key in result if key != 0])))
        load_mpi('openmpi')
        orterun_bin = find_executable('orterun')
        if orterun_bin is None:
            raise ServerFailed("Can't find orterun")

        server_cmd = [orterun_bin, "--np", str(server_count)]
        server_cmd.extend(["--mca", "btl_openib_warn_default_gid_prefix", "0"])
        server_cmd.extend(["--mca", "btl", "tcp,self"])
        server_cmd.extend(["--mca", "oob", "tcp"])
        server_cmd.extend(["--mca", "pml", "ob1"])
        server_cmd.extend(["--hostfile", hostfile])
        server_cmd.extend(["--enable-recovery", "--tag-output"])

        # Add any user supplied environment
        if env_dict is not None:
            for key, value in env_dict.items():
                os.environ[key] = value
                server_cmd.extend(["-x", "{}={}".format(key, value)])

        # the remote orte needs to know where to find daos, in the
        # case that it's not in the system prefix
        # but it should already be in our PATH, so just pass our
        # PATH along to the remote
        if build_vars["PREFIX"] != "/usr":
            server_cmd.extend(["-x", "PATH"])

        # Run server in insecure mode until Certificate tests are in place
        server_cmd.extend([
            os.path.join(build_vars["PREFIX"], "bin",
                         "daos_server"), "--debug", "--config", server_yaml,
            "start", "-i", "--recreate-superblocks"
        ])

        print("Start CMD>>>>{0}".format(' '.join(server_cmd)))

        resource.setrlimit(resource.RLIMIT_CORE,
                           (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

        SESSIONS[setname] = subprocess.Popen(server_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
        fdesc = SESSIONS[setname].stdout.fileno()
        fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL)
        fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK)
        timeout = 600
        start_time = time.time()
        matches = 0
        pattern = "DAOS I/O server.*started"
        expected_data = "Starting Servers\n"
        while True:
            output = ""
            try:
                output = SESSIONS[setname].stdout.read()
            except IOError as excpn:
                if excpn.errno != errno.EAGAIN:
                    raise ServerFailed("Server didn't start: {}".format(excpn))
                continue
            match = re.findall(pattern, output)
            expected_data += output
            matches += len(match)
            if not output or matches == server_count or \
               time.time() - start_time > timeout:
                print("<SERVER>: {}".format(expected_data))
                if matches != server_count:
                    raise ServerFailed("Server didn't start!")
                break
        print("<SERVER> server started and took {} seconds to start".format(
            time.time() - start_time))

    except Exception as error:
        print("<SERVER> Exception occurred: {0}".format(str(error)))
        traceback.print_exception(error.__class__, error, sys.exc_info()[2])
        # We need to end the session now -- exit the shell
        try:
            SESSIONS[setname].send_signal(signal.SIGINT)
            time.sleep(5)
            # get the stderr
            error = SESSIONS[setname].stderr.read()
            if SESSIONS[setname].poll() is None:
                SESSIONS[setname].kill()
            retcode = SESSIONS[setname].wait()
            print("<SERVER> server start return code: {}\nstderr:\n{}".format(
                retcode, error))
        except KeyError:
            pass
        raise ServerFailed("Server didn't start!")
Esempio n. 26
0
def runServer(hostfile, setname, basepath):
    """
    Launches DAOS servers in accordance with the supplied hostfile.

    """
    global sessions
    try:
        server_count = len(genio.read_all_lines(hostfile))

        # pile of build time variables
        with open(os.path.join(basepath, ".build_vars.json")) as json_vars:
            build_vars = json.load(json_vars)
        orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin/orterun")
        daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin/daos_server")
        ld_lib_path = os.path.join(build_vars["PREFIX"], "lib") + os.pathsep + \
                      os.path.join(build_vars["PREFIX"], "lib/daos_srv")

        env_vars = ['CRT_.*', 'DAOS_.*', 'ABT_.*', 'DD_(STDERR|LOG)', 'D_LOG_.*',
                    'OFI_.*']

        env_args = ""
        for env_var in os.environ.items():
            for pat in env_vars:
                if not re.match(pat, env_var[0]):
                    continue
                env_args += "-x {0}=\"{1}\" ".format(env_var, os.environ[env_var])

        initial_cmd = "/bin/sh"
        server_cmd = orterun_bin + " --np {0} ".format(server_count)
        server_cmd += "--hostfile {0} --enable-recovery ".format(hostfile)
        server_cmd += env_args
        server_cmd += "-x DD_SUBSYS=all -x DD_MASK=all "
        server_cmd += "-x LD_LIBRARY_PATH={0} ".format(ld_lib_path)
        server_cmd += daos_srv_bin + " -g {0} -c 1 ".format(setname)
        server_cmd += " -a" + basepath + "/install/tmp/"

        print "Start CMD>>>>{0}".format(server_cmd)

        sessions[setname] = aexpect.ShellSession(initial_cmd)
        if sessions[setname].is_responsive():
            sessions[setname].sendline(server_cmd)
            timeout = 300
            start_time = time.time()
            result = 0
            expected_data = "Starting Servers\n"
            while True:
                pattern = "DAOS server"
                output = sessions[setname].read_nonblocking(2, 2)
                match = re.findall(pattern, output)
                expected_data = expected_data + output
                result += len(match)
                if result == server_count or time.time() - start_time > timeout:
                    print ("<SERVER>: {}".format(expected_data))
                    if result != server_count:
                        raise ServerFailed("Server didn't start!")
                    break
            print "<SERVER> server started and took %s seconds to start" % \
                  (time.time() - start_time)
    except Exception as e:
        print "<SERVER> Exception occurred: {0}".format(str(e))
        raise ServerFailed("Server didn't start!")
Esempio n. 27
0
def run_server(hostfile, setname, basepath, uri_path=None, env_dict=None):
    """
    Launches DAOS servers in accordance with the supplied hostfile.
    """
    global SESSIONS
    try:
        servers = (
            [line.split(' ')[0] for line in genio.read_all_lines(hostfile)])
        server_count = len(servers)

        #Create the DAOS server configuration yaml file to pass
        #with daos_server -o <FILE_NAME>
        create_server_yaml(basepath)

        # first make sure there are no existing servers running
        kill_server(servers)

        # clean the tmpfs on the servers
        for server in servers:
            subprocess.check_call(['ssh', server,
                                   ("find /mnt/daos -mindepth 1 -maxdepth 1 "
                                    "-print0 | xargs -0r rm -rf")])

        # pile of build time variables
        with open(os.path.join(basepath, ".build_vars.json")) as json_vars:
            build_vars = json.load(json_vars)
        orterun_bin = os.path.join(build_vars["OMPI_PREFIX"], "bin", "orterun")
        daos_srv_bin = os.path.join(build_vars["PREFIX"], "bin", "daos_server")

        env_args = []
        # Add any user supplied environment
        if env_dict is not None:
            for key, value in env_dict.items():
                os.environ[key] = value
                env_args.extend(["-x", "{}={}".format(key, value)])

        server_cmd = [orterun_bin, "--np", str(server_count)]
        if uri_path is not None:
            server_cmd.extend(["--report-uri", uri_path])
        server_cmd.extend(["--hostfile", hostfile, "--enable-recovery"])
        server_cmd.extend(env_args)
        server_cmd.extend([daos_srv_bin,
                           "-a", os.path.join(basepath, "install", "tmp"),
                           "-o", '{}/{}'.format(basepath, AVOCADO_FILE)])

        print("Start CMD>>>>{0}".format(' '.join(server_cmd)))

        resource.setrlimit(
            resource.RLIMIT_CORE,
            (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

        SESSIONS[setname] = subprocess.Popen(server_cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
        fdesc = SESSIONS[setname].stdout.fileno()
        fstat = fcntl.fcntl(fdesc, fcntl.F_GETFL)
        fcntl.fcntl(fdesc, fcntl.F_SETFL, fstat | os.O_NONBLOCK)
        timeout = 600
        start_time = time.time()
        result = 0
        pattern = "DAOS I/O server"
        expected_data = "Starting Servers\n"
        while True:
            output = ""
            try:
                output = SESSIONS[setname].stdout.read()
            except IOError as excpn:
                if excpn.errno != errno.EAGAIN:
                    raise excpn
                continue
            match = re.findall(pattern, output)
            expected_data += output
            result += len(match)
            if not output or result == server_count or \
               time.time() - start_time > timeout:
                print("<SERVER>: {}".format(expected_data))
                if result != server_count:
                    raise ServerFailed("Server didn't start!")
                break
        print("<SERVER> server started and took %s seconds to start" % \
              (time.time() - start_time))
    except Exception as error:
        print("<SERVER> Exception occurred: {0}".format(str(error)))
        traceback.print_exception(excpn.__class__, error, sys.exc_info()[2])
        # we need to end the session now -- exit the shell
        try:
            SESSIONS[setname].send_signal(signal.SIGINT)
            time.sleep(5)
            # get the stderr
            error = SESSIONS[setname].stderr.read()
            if SESSIONS[setname].poll() is None:
                SESSIONS[setname].kill()
            retcode = SESSIONS[setname].wait()
            print("<SERVER> server start return code: {}\n" \
                  "stderr:\n{}".format(retcode, error))
        except KeyError:
            pass
        raise ServerFailed("Server didn't start!")