Exemple #1
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxsize - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host,
                      path=datadir,
                      mode=0o755,
                      parents=True,
                      logerr=False,
                      level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0)
                and (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return

        servers = getattr(_test, 'servers', None)
        if servers is not None:
            server_host = servers.values()[0].shortname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        moms = getattr(_test, 'moms', None)
        comms = getattr(_test, 'comms', None)
        client = getattr(_test.servers.values()[0], 'client', None)
        server = servers.values()[0]
        add_hosts = []
        if len(servers) > 1:
            for param in servers.values()[1:]:
                add_hosts.append(param.shortname)
        if moms is not None:
            for param in moms.values():
                add_hosts.append(param.shortname)
        if comms is not None:
            for param in comms.values():
                add_hosts.append(param.shortname)
        if client is not None:
            add_hosts.append(client.split('.')[0])

        add_hosts = list(set(add_hosts) - set([server_host]))

        pbs_snapshot_path = os.path.join(server.pbs_conf["PBS_EXEC"], "sbin",
                                         "pbs_snapshot")
        cur_user = self.du.get_current_user()
        cur_user_dir = pwd.getpwnam(cur_user).pw_dir
        cmd = [
            pbs_snapshot_path, '-H', server_host, '--daemon-logs', '2',
            '--accounting-logs', '2', '--with-sudo'
        ]
        if len(add_hosts) > 0:
            cmd += ['--additional-hosts=' + ','.join(add_hosts)]
        cmd += ['-o', cur_user_dir]
        ret = self.du.run_cmd(current_host,
                              cmd,
                              level=logging.DEBUG2,
                              logerr=False)
        if ret['rc'] != 0:
            _msg = 'Failed to get analysis information '
            _msg += 'on %s:' % server_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            if len(ret['out']) == 0:
                self.logger.error('Snapshot command failed')
                f.close()
                return

        snap_out = ret['out'][0]
        snap_out_dest = (snap_out.split(":")[1]).strip()

        dest = os.path.join(datadir, 'PBS_' + server_host + '.tar.gz')
        ret = self.du.run_copy(current_host,
                               snap_out_dest,
                               dest,
                               sudo=True,
                               level=logging.DEBUG2)
        self.du.rm(current_host,
                   path=snap_out_dest,
                   recursive=True,
                   force=True,
                   level=logging.DEBUG2)

        f.close()
        self.__save_data_count += 1
        _msg = 'Saved post analysis data'
        self.logger.info(_msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host, path=datadir, mode=0755,
                      parents=True, logerr=False, level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0) and
                (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'],
                                'unsupported', 'pbs_diag')
        cur_user = self.du.get_current_user()
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', cur_user]
        cmd += ['-o', pwd.getpwnam(cur_user).pw_dir]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest,
                               sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host, path=diag_out, sudo=True, force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host, path=path, sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host, path=diag_out_dest,
                       force=True, sudo=True, level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host, cmd, sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host, path=c, force=True,
                               sudo=True, level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host, diag_out_dest, sudo=True,
                       recursive=True, force=True, level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz',
                               dest, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host, path=diag_out_dest + '.tar.gz',
                   force=True, sudo=True, level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
class Job(ResourceResv):
    """
    PBS Job. Attributes and Resources

    :param username: Job username
    :type username: str or None
    :param attrs: Job attributes
    :type attrs: Dictionary
    :param jobname: Name of the PBS job
    :type jobname: str or None
    """

    dflt_attributes = {
        ATTR_N: 'STDIN',
        ATTR_j: 'n',
        ATTR_m: 'a',
        ATTR_p: '0',
        ATTR_r: 'y',
        ATTR_k: 'oe',
    }
    runtime = 100
    du = DshUtils()

    def __init__(self, username=TEST_USER, attrs={}, jobname=None):
        self.platform = self.du.get_platform()
        self.server = {}
        self.script = None
        self.script_body = None
        if username is not None:
            self.username = str(username)
        else:
            self.username = None
        self.du = None
        self.interactive_handle = None
        if self.platform == 'cray' or self.platform == 'craysim':
            if 'Resource_List.select' in attrs:
                select = attrs['Resource_List.select']
                attrs['Resource_List.select'] = self.add_cray_vntype(select)
            elif 'Resource_List.vntype' not in attrs:
                attrs['Resource_List.vntype'] = 'cray_compute'

        PBSObject.__init__(self, None, attrs, self.dflt_attributes)

        if jobname is not None:
            self.custom_attrs[ATTR_N] = jobname
            self.attributes[ATTR_N] = jobname
        self.set_variable_list(self.username)
        self.set_sleep_time(100)

    def __del__(self):
        del self.__dict__

    def add_cray_vntype(self, select=None):
        """
        Cray specific function to add vntype as ``cray_compute`` to each
        select chunk

        :param select: PBS select statement
        :type select: str or None
        """
        ra = []
        r = select.split('+')
        for i in r:
            select = PbsTypeSelect(i)
            novntype = 'vntype' not in select.resources
            nohost = 'host' not in select.resources
            novnode = 'vnode' not in select.resources
            if novntype and nohost and novnode:
                i = i + ":vntype=cray_compute"
            ra.append(i)
        select_str = ''
        for l in ra:
            select_str = select_str + "+" + l
        select_str = select_str[1:]
        return select_str

    def set_attributes(self, a={}):
        """
        set attributes and custom attributes on this job.
        custom attributes are used when converting attributes to CLI.
        In case of Cray platform if 'Resource_List.vntype' is set
        already then remove it and add vntype value to each chunk of a
        select statement.

        :param a: Attribute dictionary
        :type a: Dictionary
        """
        if isinstance(a, list):
            a = OrderedDict(a)

        self.attributes = OrderedDict(
            list(self.dflt_attributes.items()) +
            list(self.attributes.items()) + list(a.items()))

        if self.platform == 'cray' or self.platform == 'craysim':
            s = 'Resource_List.select' in a
            v = 'Resource_List.vntype' in self.custom_attrs
            if s and v:
                del self.custom_attrs['Resource_List.vntype']
                select = a['Resource_List.select']
                a['Resource_List.select'] = self.add_cray_vntype(select)

        self.custom_attrs = OrderedDict(
            list(self.custom_attrs.items()) + list(a.items()))

    def set_variable_list(self, user=None, workdir=None):
        """
        Customize the ``Variable_List`` job attribute to ``<user>``
        """
        if user is None:
            userinfo = pwd.getpwuid(os.getuid())
            user = userinfo[0]
            homedir = userinfo[5]
        else:
            try:
                homedir = pwd.getpwnam(user)[5]
            except:
                homedir = ""

        self.username = user

        s = ['PBS_O_HOME=' + homedir]
        s += ['PBS_O_LANG=en_US.UTF-8']
        s += ['PBS_O_LOGNAME=' + user]
        s += ['PBS_O_PATH=/usr/bin:/bin:/usr/bin:/usr/local/bin']
        s += ['PBS_O_MAIL=/var/spool/mail/' + user]
        s += ['PBS_O_SHELL=/bin/bash']
        s += ['PBS_O_SYSTEM=Linux']
        if workdir is not None:
            wd = workdir
        else:
            wd = os.getcwd()
        s += ['PBS_O_WORKDIR=' + str(wd)]

        self.attributes[ATTR_v] = ",".join(s)
        self.set_attributes()

    def set_sleep_time(self, duration):
        """
        Set the sleep duration for this job.

        :param duration: The duration, in seconds, to sleep
        :type duration: int
        """
        self.set_execargs('/bin/sleep', duration)

    def set_execargs(self, executable, arguments=None):
        """
        Set the executable and arguments to use for this job

        :param executable: path to an executable. No checks are made.
        :type executable: str
        :param arguments: arguments to executable.
        :type arguments: str or list or int
        """
        msg = ['job: executable set to ' + str(executable)]
        if arguments is not None:
            msg += [' with arguments: ' + str(arguments)]

        self.logger.info("".join(msg))
        self.attributes[ATTR_executable] = executable
        if arguments is not None:
            args = ''
            xml_beginargs = '<jsdl-hpcpa:Argument>'
            xml_endargs = '</jsdl-hpcpa:Argument>'
            if isinstance(arguments, list):
                for a in arguments:
                    args += xml_beginargs + str(a) + xml_endargs
            elif isinstance(arguments, str):
                args = xml_beginargs + arguments + xml_endargs
            elif isinstance(arguments, int):
                args = xml_beginargs + str(arguments) + xml_endargs
            self.attributes[ATTR_Arglist] = args
        else:
            self.unset_attributes([ATTR_Arglist])
        self.set_attributes()

    def create_script(self, body=None, asuser=None, hostname=None):
        """
        Create a job script from a given body of text into a
        temporary location

        :param body: the body of the script
        :type body: str or None
        :param asuser: Optionally the user to own this script,
                      defaults ot current user
        :type asuser: str or None
        :param hostname: The host on which the job script is to
                         be created
        :type hostname: str or None
        """

        if body is None:
            return None

        if isinstance(body, list):
            body = '\n'.join(body)

        if self.platform == 'cray' or self.platform == 'craysim':
            body = body.split("\n")
            for i, line in enumerate(body):
                if line.startswith("#PBS") and "select=" in line:
                    if 'Resource_List.vntype' in self.attributes:
                        self.unset_attributes(['Resource_List.vntype'])
                    line_arr = line.split(" ")
                    for j, element in enumerate(line_arr):
                        select = element.startswith("select=")
                        lselect = element.startswith("-lselect=")
                        if select or lselect:
                            if lselect:
                                sel_str = element[9:]
                            else:
                                sel_str = element[7:]
                            sel_str = self.add_cray_vntype(select=sel_str)
                            if lselect:
                                line_arr[j] = "-lselect=" + sel_str
                            else:
                                line_arr[j] = "select=" + sel_str
                    body[i] = " ".join(line_arr)
            body = '\n'.join(body)

        # If the user has a userhost, the job will run from there
        # so the script should be made there
        if self.username:
            user = PbsUser.get_user(self.username)
            if user.host:
                hostname = user.host
                asuser = user.name

        self.script_body = body
        if self.du is None:
            self.du = DshUtils()
        # First create the temporary file as current user and only change
        # its mode once the current user has written to it
        fn = self.du.create_temp_file(hostname,
                                      prefix='PtlPbsJobScript',
                                      asuser=asuser,
                                      body=body)
        self.du.chmod(hostname, fn, mode=0o755)
        self.script = fn
        return fn

    def create_subjob_id(self, job_array_id, subjob_index):
        """
        insert subjob index into the square brackets of job array id

        :param job_array_id: PBS parent array job id
        :type job_array_id: str
        :param subjob_index: index of subjob
        :type subjob_index: int
        :returns: subjob id string
        """
        idx = job_array_id.find('[]')
        return job_array_id[:idx + 1] + str(subjob_index) + \
            job_array_id[idx + 1:]

    def create_eatcpu_job(self, duration=None, hostname=None):
        """
        Create a job that eats cpu indefinitely or for the given
        duration of time

        :param duration: The duration, in seconds, to sleep
        :type duration: int
        :param hostname: hostname on which to execute the job
        :type hostname: str or None
        """
        if self.du is None:
            self.du = DshUtils()
        shebang_line = '#!' + self.du.which(hostname, exe='python3')
        body = """
import signal
import sys

x = 0


def receive_alarm(signum, stack):
    sys.exit()

signal.signal(signal.SIGALRM, receive_alarm)

if (len(sys.argv) > 1):
    input_time = sys.argv[1]
    print('Terminating after %s seconds' % input_time)
    signal.alarm(int(input_time))
else:
    print('Running indefinitely')

while True:
    x += 1
"""
        script_body = shebang_line + body
        script_path = self.du.create_temp_file(hostname=hostname,
                                               body=script_body,
                                               suffix='.py')
        if not self.du.is_localhost(hostname):
            d = pwd.getpwnam(self.username).pw_dir
            ret = self.du.run_copy(hosts=hostname, src=script_path, dest=d)
            if ret is None or ret['rc'] != 0:
                raise AssertionError("Failed to copy file %s to %s" %
                                     (script_path, hostname))
        pbs_conf = self.du.parse_pbs_config(hostname)
        shell_path = os.path.join(pbs_conf['PBS_EXEC'], 'bin', 'pbs_python')
        a = {ATTR_S: shell_path}
        self.set_attributes(a)
        mode = 0o755
        if not self.du.chmod(
                hostname=hostname, path=script_path, mode=mode, sudo=True):
            raise AssertionError("Failed to set permissions for file %s"
                                 " to %s" % (script_path, oct(mode)))
        self.set_execargs(script_path, duration)
Exemple #4
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host,
                      path=datadir,
                      mode=0755,
                      parents=True,
                      logerr=False,
                      level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0)
                and (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported',
                                'pbs_diag')
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', self.du.get_current_user()]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host,
                               diag_out_r,
                               diag_out_dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host,
                       path=diag_out,
                       sudo=True,
                       force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host,
                                    path=path,
                                    sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host,
                       path=diag_out_dest,
                       force=True,
                       sudo=True,
                       level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host,
                                      cmd,
                                      sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host,
                               path=c,
                               force=True,
                               sudo=True,
                               level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host,
                       diag_out_dest,
                       sudo=True,
                       recursive=True,
                       force=True,
                       level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host,
                               diag_out_dest + '.tar.gz',
                               dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host,
                   path=diag_out_dest + '.tar.gz',
                   force=True,
                   sudo=True,
                   level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxsize - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host, path=datadir, mode=0o755,
                      parents=True, logerr=False, level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0) and
                (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return

        servers = getattr(_test, 'servers', None)
        if servers is not None:
            server_host = servers.values()[0].shortname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        moms = getattr(_test, 'moms', None)
        comms = getattr(_test, 'comms', None)
        client = getattr(_test.servers.values()[0], 'client', None)
        server = servers.values()[0]
        add_hosts = []
        if len(servers) > 1:
            for param in servers.values()[1:]:
                add_hosts.append(param.shortname)
        if moms is not None:
            for param in moms.values():
                add_hosts.append(param.shortname)
        if comms is not None:
            for param in comms.values():
                add_hosts.append(param.shortname)
        if client is not None:
            add_hosts.append(client.split('.')[0])

        add_hosts = list(set(add_hosts) - set([server_host]))

        pbs_snapshot_path = os.path.join(
            server.pbs_conf["PBS_EXEC"], "sbin", "pbs_snapshot")
        cur_user = self.du.get_current_user()
        cur_user_dir = pwd.getpwnam(cur_user).pw_dir
        cmd = [
            pbs_snapshot_path,
            '-H', server_host,
            '--daemon-logs',
            '2',
            '--accounting-logs',
            '2',
            '--with-sudo'
            ]
        if len(add_hosts) > 0:
            cmd += ['--additional-hosts=' + ','.join(add_hosts)]
        cmd += ['-o', cur_user_dir]
        ret = self.du.run_cmd(current_host, cmd, level=logging.DEBUG2,
                              logerr=False)
        if ret['rc'] != 0:
            _msg = 'Failed to get analysis information for '
            _msg += 'on %s:' % servers_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            if len(ret['out']) == 0:
                self.logger.error('Snapshot command failed')
                f.close()
                return

        snap_out = ret['out'][0]
        snap_out_dest = (snap_out.split(":")[1]).strip()

        dest = os.path.join(datadir,
                            'PBS_' + server_host + '.tar.gz')
        ret = self.du.run_copy(current_host, snap_out_dest,
                               dest, sudo=True, level=logging.DEBUG2)
        self.du.rm(current_host, path=snap_out_dest,
                   recursive=True, force=True, level=logging.DEBUG2)

        f.close()
        self.__save_data_count += 1
        _msg = 'Saved post analysis data'
        self.logger.info(_msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')