Beispiel #1
0
 def get_system_info(self, hostname=None):
     du = DshUtils()
     # getting RAM size in gb
     mem_info = du.cat(hostname, "/proc/meminfo")
     if mem_info['rc'] != 0:
         _msg = 'failed to get content of /proc/meminfo of host: '
         self.logger.error(_msg + hostname)
     else:
         got_mem_available = False
         for i in mem_info['out']:
             if "MemTotal" in i:
                 self.system_total_ram = float(i.split()[1]) / (2**20)
             elif "MemAvailable" in i:
                 mem_available = float(i.split()[1]) / (2**20)
                 got_mem_available = True
                 break
             elif "MemFree" in i:
                 mem_free = float(i.split()[1]) / (2**20)
             elif "Buffers" in i:
                 buffers = float(i.split()[1]) / (2**20)
             elif i.startswith("Cached"):
                 cached = float(i.split()[1]) / (2**20)
         if got_mem_available:
             self.system_ram = mem_available
         else:
             self.system_ram = mem_free + buffers + cached
     # getting disk size in gb
     pbs_conf = du.parse_pbs_config(hostname)
     pbs_home_info = du.run_cmd(hostname,
                                cmd=['df', '-k', pbs_conf['PBS_HOME']])
     if pbs_home_info['rc'] != 0:
         _msg = 'failed to get output of df -k command of host: '
         self.logger.error(_msg + hostname)
     else:
         disk_info = pbs_home_info['out']
         disk_size = disk_info[1].split()
         self.system_disk = float(disk_size[3]) / (2**20)
         self.system_disk_used_percent = float(disk_size[4].rstrip('%'))
Beispiel #2
0
 def get_system_info(self, hostname=None):
     du = DshUtils()
     # getting RAM size in gb
     mem_info = du.cat(hostname, "/proc/meminfo")
     if mem_info['rc'] != 0:
         _msg = 'failed to get content of /proc/meminfo of host: '
         self.logger.error(_msg + hostname)
     else:
         for i in mem_info['out']:
             if "MemAvailable" in i:
                 self.system_ram = float(i.split()[1]) / (2**20)
                 break
     # getting disk size in gb
     pbs_conf = du.parse_pbs_config(hostname)
     pbs_home_info = du.run_cmd(hostname,
                                cmd=['df', '-k', pbs_conf['PBS_HOME']])
     if pbs_home_info['rc'] != 0:
         _msg = 'failed to get output of df -k command of host: '
         self.logger.error(_msg + hostname)
     else:
         disk_info = pbs_home_info['out']
         disk_size = disk_info[1].split()
         self.system_disk = float(disk_size[3]) / (2**20)
Beispiel #3
0
    def check_hardware_status_and_core_files(self, test):
        """
        function checks hardware status and core files
        every 5 minutes
        """
        du = DshUtils()
        systems = list(self.param_dict['servers'])
        systems.extend(self.param_dict['moms'])
        systems.extend(self.param_dict['comms'])
        systems = list(set(systems))

        if hasattr(test, 'test'):
            _test = test.test
        elif hasattr(test, 'context'):
            _test = test.context
        else:
            return None

        for name in ['servers', 'moms', 'comms', 'clients']:
            mlist = None
            if (hasattr(_test, name)
                    and (getattr(_test, name, None) is not None)):
                mlist = getattr(_test, name).values()
            if mlist:
                for mc in mlist:
                    platform = mc.platform
                    if ((platform not in ['linux', 'shasta', 'cray'])
                            and (mc.hostname in systems)):
                        systems.remove(mc.hostname)

        self.hardware_report_timer = Timer(
            300, self.check_hardware_status_and_core_files, args=(test, ))
        self.hardware_report_timer.start()

        for hostname in systems:
            hr = SystemInfo()
            hr.get_system_info(hostname)
            # monitors disk
            used_disk_percent = getattr(hr, 'system_disk_used_percent', None)
            if used_disk_percent is None:
                _msg = hostname
                _msg += ": unable to get disk info"
                self.hardware_report_timer.cancel()
                raise SkipTest(_msg)
            elif 70 <= used_disk_percent < 95:
                _msg = hostname + ": disk usage is at "
                _msg += str(used_disk_percent) + "%"
                _msg += ", disk cleanup is recommended."
                self.logger.warning(_msg)
            elif used_disk_percent >= 95:
                _msg = hostname + ":disk usage > 95%, skipping the test(s)"
                self.hardware_report_timer.cancel()
                raise SkipTest(_msg)
            # checks for core files
            pbs_conf = du.parse_pbs_config(hostname)
            mom_priv_path = os.path.join(pbs_conf["PBS_HOME"], "mom_priv")
            if du.isdir(hostname=hostname, path=mom_priv_path):
                mom_priv_files = du.listdir(hostname=hostname,
                                            path=mom_priv_path,
                                            sudo=True,
                                            fullpath=False)
                if fnmatch.filter(mom_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += mom_priv_path
                    self.logger.warning(_msg)
            server_priv_path = os.path.join(pbs_conf["PBS_HOME"],
                                            "server_priv")
            if du.isdir(hostname=hostname, path=server_priv_path):
                server_priv_files = du.listdir(hostname=hostname,
                                               path=server_priv_path,
                                               sudo=True,
                                               fullpath=False)
                if fnmatch.filter(server_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += server_priv_path
                    self.logger.warning(_msg)
            sched_priv_path = os.path.join(pbs_conf["PBS_HOME"], "sched_priv")
            if du.isdir(hostname=hostname, path=sched_priv_path):
                sched_priv_files = du.listdir(hostname=hostname,
                                              path=sched_priv_path,
                                              sudo=True,
                                              fullpath=False)
                if fnmatch.filter(sched_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += sched_priv_path
                    self.logger.warning(_msg)
            for u in PBS_ALL_USERS:
                user_home_files = du.listdir(hostname=hostname,
                                             path=u.home,
                                             sudo=True,
                                             fullpath=False,
                                             runas=u.name)
                if user_home_files and fnmatch.filter(user_home_files,
                                                      "core*"):
                    _msg = hostname + ": user-" + str(u)
                    _msg += ": core files found in "
                    self.logger.warning(_msg + u.home)
Beispiel #4
0
class Job(ResourceResv):

    """
    PBS Job. Attributes and Resources

    :param username: Job username
    :type username: str or None
    :param attrs: Job attributes
    :type attrs: Dictionary
    :param jobname: Name of the PBS job
    :type jobname: str or None
    """

    dflt_attributes = {
        ATTR_N: 'STDIN',
        ATTR_j: 'n',
        ATTR_m: 'a',
        ATTR_p: '0',
        ATTR_r: 'y',
        ATTR_k: 'oe',
    }
    runtime = 100
    du = DshUtils()

    def __init__(self, username=TEST_USER, attrs={}, jobname=None):
        self.platform = self.du.get_platform()
        self.server = {}
        self.script = None
        self.script_body = None
        if username is not None:
            self.username = str(username)
        else:
            self.username = None
        self.du = None
        self.interactive_handle = None
        if self.platform == 'cray' or self.platform == 'craysim':
            if 'Resource_List.select' in attrs:
                select = attrs['Resource_List.select']
                attrs['Resource_List.select'] = self.add_cray_vntype(select)
            elif 'Resource_List.vntype' not in attrs:
                attrs['Resource_List.vntype'] = 'cray_compute'

        PBSObject.__init__(self, None, attrs, self.dflt_attributes)

        if jobname is not None:
            self.custom_attrs[ATTR_N] = jobname
            self.attributes[ATTR_N] = jobname
        self.set_variable_list(self.username)
        self.set_sleep_time(100)

    def __del__(self):
        del self.__dict__

    def add_cray_vntype(self, select=None):
        """
        Cray specific function to add vntype as ``cray_compute`` to each
        select chunk

        :param select: PBS select statement
        :type select: str or None
        """
        ra = []
        r = select.split('+')
        for i in r:
            select = PbsTypeSelect(i)
            novntype = 'vntype' not in select.resources
            nohost = 'host' not in select.resources
            novnode = 'vnode' not in select.resources
            if novntype and nohost and novnode:
                i = i + ":vntype=cray_compute"
            ra.append(i)
        select_str = ''
        for l in ra:
            select_str = select_str + "+" + l
        select_str = select_str[1:]
        return select_str

    def set_attributes(self, a={}):
        """
        set attributes and custom attributes on this job.
        custom attributes are used when converting attributes to CLI.
        In case of Cray platform if 'Resource_List.vntype' is set
        already then remove it and add vntype value to each chunk of a
        select statement.

        :param a: Attribute dictionary
        :type a: Dictionary
        """
        if isinstance(a, list):
            a = OrderedDict(a)

        self.attributes = OrderedDict(list(self.dflt_attributes.items()) +
                                      list(self.attributes.items()) +
                                      list(a.items()))

        if self.platform == 'cray' or self.platform == 'craysim':
            s = 'Resource_List.select' in a
            v = 'Resource_List.vntype' in self.custom_attrs
            if s and v:
                del self.custom_attrs['Resource_List.vntype']
                select = a['Resource_List.select']
                a['Resource_List.select'] = self.add_cray_vntype(select)

        self.custom_attrs = OrderedDict(list(self.custom_attrs.items()) +
                                        list(a.items()))

    def set_variable_list(self, user=None, workdir=None):
        """
        Customize the ``Variable_List`` job attribute to ``<user>``
        """
        if user is None:
            userinfo = pwd.getpwuid(os.getuid())
            user = userinfo[0]
            homedir = userinfo[5]
        else:
            try:
                homedir = pwd.getpwnam(user)[5]
            except Exception:
                homedir = ""

        self.username = user

        s = ['PBS_O_HOME=' + homedir]
        s += ['PBS_O_LANG=en_US.UTF-8']
        s += ['PBS_O_LOGNAME=' + user]
        s += ['PBS_O_PATH=/usr/bin:/bin:/usr/bin:/usr/local/bin']
        s += ['PBS_O_MAIL=/var/spool/mail/' + user]
        s += ['PBS_O_SHELL=/bin/bash']
        s += ['PBS_O_SYSTEM=Linux']
        if workdir is not None:
            wd = workdir
        else:
            wd = os.getcwd()
        s += ['PBS_O_WORKDIR=' + str(wd)]

        self.attributes[ATTR_v] = ",".join(s)
        self.set_attributes()

    def set_sleep_time(self, duration):
        """
        Set the sleep duration for this job.

        :param duration: The duration, in seconds, to sleep
        :type duration: int
        """
        self.set_execargs('/bin/sleep', duration)

    def set_execargs(self, executable, arguments=None):
        """
        Set the executable and arguments to use for this job

        :param executable: path to an executable. No checks are made.
        :type executable: str
        :param arguments: arguments to executable.
        :type arguments: str or list or int
        """
        msg = ['job: executable set to ' + str(executable)]
        if arguments is not None:
            msg += [' with arguments: ' + str(arguments)]

        self.logger.info("".join(msg))
        self.attributes[ATTR_executable] = executable
        if arguments is not None:
            args = ''
            xml_beginargs = '<jsdl-hpcpa:Argument>'
            xml_endargs = '</jsdl-hpcpa:Argument>'
            if isinstance(arguments, list):
                for a in arguments:
                    args += xml_beginargs + str(a) + xml_endargs
            elif isinstance(arguments, str):
                args = xml_beginargs + arguments + xml_endargs
            elif isinstance(arguments, int):
                args = xml_beginargs + str(arguments) + xml_endargs
            self.attributes[ATTR_Arglist] = args
        else:
            self.unset_attributes([ATTR_Arglist])
        self.set_attributes()

    def create_script(self, body=None, asuser=None, hostname=None):
        """
        Create a job script from a given body of text into a
        temporary location

        :param body: the body of the script
        :type body: str or None
        :param asuser: Optionally the user to own this script,
                      defaults ot current user
        :type asuser: str or None
        :param hostname: The host on which the job script is to
                         be created
        :type hostname: str or None
        """

        if body is None:
            return None

        if isinstance(body, list):
            body = '\n'.join(body)

        if self.platform == 'cray' or self.platform == 'craysim':
            body = body.split("\n")
            for i, line in enumerate(body):
                if line.startswith("#PBS") and "select=" in line:
                    if 'Resource_List.vntype' in self.attributes:
                        self.unset_attributes(['Resource_List.vntype'])
                    line_arr = line.split(" ")
                    for j, element in enumerate(line_arr):
                        select = element.startswith("select=")
                        lselect = element.startswith("-lselect=")
                        if select or lselect:
                            if lselect:
                                sel_str = element[9:]
                            else:
                                sel_str = element[7:]
                            sel_str = self.add_cray_vntype(select=sel_str)
                            if lselect:
                                line_arr[j] = "-lselect=" + sel_str
                            else:
                                line_arr[j] = "select=" + sel_str
                    body[i] = " ".join(line_arr)
            body = '\n'.join(body)

        # If the user has a userhost, the job will run from there
        # so the script should be made there
        if self.username:
            user = PbsUser.get_user(self.username)
            if user.host:
                hostname = user.host
                asuser = user.name

        self.script_body = body
        if self.du is None:
            self.du = DshUtils()
        # First create the temporary file as current user and only change
        # its mode once the current user has written to it
        fn = self.du.create_temp_file(hostname, prefix='PtlPbsJobScript',
                                      asuser=asuser, body=body)
        self.du.chmod(hostname, fn, mode=0o755)
        self.script = fn
        return fn

    def create_subjob_id(self, job_array_id, subjob_index):
        """
        insert subjob index into the square brackets of job array id

        :param job_array_id: PBS parent array job id
        :type job_array_id: str
        :param subjob_index: index of subjob
        :type subjob_index: int
        :returns: subjob id string
        """
        idx = job_array_id.find('[]')
        return job_array_id[:idx + 1] + str(subjob_index) + \
            job_array_id[idx + 1:]

    def create_eatcpu_job(self, duration=None, hostname=None):
        """
        Create a job that eats cpu indefinitely or for the given
        duration of time

        :param duration: The duration, in seconds, to sleep
        :type duration: int
        :param hostname: hostname on which to execute the job
        :type hostname: str or None
        """
        if self.du is None:
            self.du = DshUtils()
        shebang_line = '#!' + self.du.which(hostname, exe='python3')
        body = """
import signal
import sys

x = 0


def receive_alarm(signum, stack):
    sys.exit()

signal.signal(signal.SIGALRM, receive_alarm)

if (len(sys.argv) > 1):
    input_time = sys.argv[1]
    print('Terminating after %s seconds' % input_time)
    signal.alarm(int(input_time))
else:
    print('Running indefinitely')

while True:
    x += 1
"""
        script_body = shebang_line + body
        script_path = self.du.create_temp_file(hostname=hostname,
                                               body=script_body,
                                               suffix='.py')
        pbs_conf = self.du.parse_pbs_config(hostname)
        shell_path = os.path.join(pbs_conf['PBS_EXEC'],
                                  'bin', 'pbs_python')
        a = {ATTR_S: shell_path}
        self.set_attributes(a)
        mode = 0o755
        if not self.du.chmod(hostname=hostname, path=script_path, mode=mode,
                             sudo=True):
            raise AssertionError("Failed to set permissions for file %s"
                                 " to %s" % (script_path, oct(mode)))
        self.set_execargs(script_path, duration)
        return script_path
Beispiel #5
0
 def check_hardware_status_and_core_files(self):
     """
     function checks hardware status and core files
     every 5 minutes
     """
     du = DshUtils()
     self.hardware_report_timer = Timer(
         300, self.check_hardware_status_and_core_files)
     self.hardware_report_timer.start()
     systems = list(self.param_dict['servers'])
     systems.extend(self.param_dict['moms'])
     systems.extend(self.param_dict['comms'])
     systems = list(set(systems))
     for hostname in systems:
         hr = SystemInfo()
         hr.get_system_info(hostname)
         # monitors disk
         used_disk_percent = getattr(hr,
                                     'system_disk_used_percent', None)
         if used_disk_percent is None:
             _msg = hostname
             _msg += ": unable to get disk info"
             self.hardware_report_timer.cancel()
             raise SkipTest(_msg)
         elif 70 <= used_disk_percent < 95:
             _msg = hostname + ": disk usage is at "
             _msg += str(used_disk_percent) + "%"
             _msg += ", disk cleanup is recommended."
             self.logger.warning(_msg)
         elif used_disk_percent >= 95:
             _msg = hostname + ":disk usage > 95%, skipping the test(s)"
             self.hardware_report_timer.cancel()
             raise SkipTest(_msg)
         # checks for core files
         pbs_conf = du.parse_pbs_config(hostname)
         mom_priv_path = os.path.join(pbs_conf["PBS_HOME"], "mom_priv")
         if du.isdir(hostname=hostname, path=mom_priv_path):
             mom_priv_files = du.listdir(
                 hostname=hostname,
                 path=mom_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(mom_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += mom_priv_path
                 self.logger.warning(_msg)
         server_priv_path = os.path.join(
             pbs_conf["PBS_HOME"], "server_priv")
         if du.isdir(hostname=hostname, path=server_priv_path):
             server_priv_files = du.listdir(
                 hostname=hostname,
                 path=server_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(server_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += server_priv_path
                 self.logger.warning(_msg)
         sched_priv_path = os.path.join(pbs_conf["PBS_HOME"], "sched_priv")
         if du.isdir(hostname=hostname, path=sched_priv_path):
             sched_priv_files = du.listdir(
                 hostname=hostname,
                 path=sched_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(sched_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += sched_priv_path
                 self.logger.warning(_msg)
         for u in PBS_ALL_USERS:
             user_home_files = du.listdir(hostname=hostname, path=u.home,
                                          sudo=True, fullpath=False,
                                          runas=u.name)
             if user_home_files and fnmatch.filter(
                     user_home_files, "core*"):
                 _msg = hostname + ": user-" + str(u)
                 _msg += ": core files found in "
                 self.logger.warning(_msg + u.home)