Ejemplo n.º 1
0
    def _cleanup(self):
        self.logger.info('Cleaning up temporary files')
        du = DshUtils()
        root_dir = os.sep
        dirlist = set([os.path.join(root_dir, 'tmp'),
                       os.path.join(root_dir, 'var', 'tmp')])
        # get tmp dir from the environment
        for envname in 'TMPDIR', 'TEMP', 'TMP':
            dirname = os.getenv(envname)
            if dirname:
                dirlist.add(dirname)

        p = re.compile(r'^pbs\.\d+')
        for tmpdir in dirlist:
            # list the contents of each tmp dir and
            # get the file list to be deleted
            self.logger.info('Cleaning up ' + tmpdir + ' dir')
            ftd = []
            files = du.listdir(path=tmpdir)
            bn = os.path.basename
            ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
            ftd.extend([f for f in files if bn(f).startswith('STDIN')])
            ftd.extend([f for f in files if bn(f).startswith('pbsscrpt')])
            ftd.extend([f for f in files if bn(f).startswith('pbs.conf.')])
            ftd.extend([f for f in files if p.match(bn(f))])
            for f in ftd:
                du.rm(path=f, sudo=True, recursive=True, force=True,
                      level=logging.DEBUG)
        tmpdir = tempfile.gettempdir()
        os.chdir(tmpdir)
        tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid())
        if du.isdir(path=tmppath):
            du.rm(path=tmppath, recursive=True, sudo=True, force=True,
                  level=logging.DEBUG)
Ejemplo n.º 2
0
 def _cleanup(self):
     self.logger.info('Cleaning up temporary files')
     du = DshUtils()
     tmpdir = tempfile.gettempdir()
     ftd = []
     if tmpdir:
         files = du.listdir(path=tmpdir)
         bn = os.path.basename
         ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
         ftd.extend([f for f in files if bn(f).startswith('STDIN')])
         for f in ftd:
             du.rm(path=f, sudo=True, recursive=True, force=True,
                   level=logging.DEBUG)
     os.chdir(tmpdir)
     tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid())
     if du.isdir(path=tmppath):
         du.rm(path=tmppath, recursive=True, sudo=True, force=True,
               level=logging.DEBUG)
Ejemplo n.º 3
0
 def _cleanup(self):
     self.logger.info('Cleaning up temporary files')
     du = DshUtils()
     tmpdir = tempfile.gettempdir()
     ftd = []
     if tmpdir:
         files = du.listdir(path=tmpdir)
         bn = os.path.basename
         ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
         ftd.extend([f for f in files if bn(f).startswith('STDIN')])
         for f in ftd:
             du.rm(path=f, sudo=True, recursive=True, force=True,
                   level=logging.DEBUG)
     os.chdir(tmpdir)
     tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid())
     if du.isdir(path=tmppath):
         du.rm(path=tmppath, recursive=True, sudo=True, force=True,
               level=logging.DEBUG)
Ejemplo n.º 4
0
    def check_hardware_status_and_core_files(self, test):
        """
        function checks hardware status and core files
        every 5 minutes
        """
        du = DshUtils()
        systems = list(self.param_dict['servers'])
        systems.extend(self.param_dict['moms'])
        systems.extend(self.param_dict['comms'])
        systems = list(set(systems))

        if hasattr(test, 'test'):
            _test = test.test
        elif hasattr(test, 'context'):
            _test = test.context
        else:
            return None

        for name in ['servers', 'moms', 'comms', 'clients']:
            mlist = None
            if (hasattr(_test, name)
                    and (getattr(_test, name, None) is not None)):
                mlist = getattr(_test, name).values()
            if mlist:
                for mc in mlist:
                    platform = mc.platform
                    if ((platform not in ['linux', 'shasta', 'cray'])
                            and (mc.hostname in systems)):
                        systems.remove(mc.hostname)

        self.hardware_report_timer = Timer(
            300, self.check_hardware_status_and_core_files, args=(test, ))
        self.hardware_report_timer.start()

        for hostname in systems:
            hr = SystemInfo()
            hr.get_system_info(hostname)
            # monitors disk
            used_disk_percent = getattr(hr, 'system_disk_used_percent', None)
            if used_disk_percent is None:
                _msg = hostname
                _msg += ": unable to get disk info"
                self.hardware_report_timer.cancel()
                raise SkipTest(_msg)
            elif 70 <= used_disk_percent < 95:
                _msg = hostname + ": disk usage is at "
                _msg += str(used_disk_percent) + "%"
                _msg += ", disk cleanup is recommended."
                self.logger.warning(_msg)
            elif used_disk_percent >= 95:
                _msg = hostname + ":disk usage > 95%, skipping the test(s)"
                self.hardware_report_timer.cancel()
                raise SkipTest(_msg)
            # checks for core files
            pbs_conf = du.parse_pbs_config(hostname)
            mom_priv_path = os.path.join(pbs_conf["PBS_HOME"], "mom_priv")
            if du.isdir(hostname=hostname, path=mom_priv_path):
                mom_priv_files = du.listdir(hostname=hostname,
                                            path=mom_priv_path,
                                            sudo=True,
                                            fullpath=False)
                if fnmatch.filter(mom_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += mom_priv_path
                    self.logger.warning(_msg)
            server_priv_path = os.path.join(pbs_conf["PBS_HOME"],
                                            "server_priv")
            if du.isdir(hostname=hostname, path=server_priv_path):
                server_priv_files = du.listdir(hostname=hostname,
                                               path=server_priv_path,
                                               sudo=True,
                                               fullpath=False)
                if fnmatch.filter(server_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += server_priv_path
                    self.logger.warning(_msg)
            sched_priv_path = os.path.join(pbs_conf["PBS_HOME"], "sched_priv")
            if du.isdir(hostname=hostname, path=sched_priv_path):
                sched_priv_files = du.listdir(hostname=hostname,
                                              path=sched_priv_path,
                                              sudo=True,
                                              fullpath=False)
                if fnmatch.filter(sched_priv_files, "core*"):
                    _msg = hostname + ": core files found in "
                    _msg += sched_priv_path
                    self.logger.warning(_msg)
            for u in PBS_ALL_USERS:
                user_home_files = du.listdir(hostname=hostname,
                                             path=u.home,
                                             sudo=True,
                                             fullpath=False,
                                             runas=u.name)
                if user_home_files and fnmatch.filter(user_home_files,
                                                      "core*"):
                    _msg = hostname + ": user-" + str(u)
                    _msg += ": core files found in "
                    self.logger.warning(_msg + u.home)
Ejemplo n.º 5
0
 def check_hardware_status_and_core_files(self):
     """
     function checks hardware status and core files
     every 5 minutes
     """
     du = DshUtils()
     self.hardware_report_timer = Timer(
         300, self.check_hardware_status_and_core_files)
     self.hardware_report_timer.start()
     systems = list(self.param_dict['servers'])
     systems.extend(self.param_dict['moms'])
     systems.extend(self.param_dict['comms'])
     systems = list(set(systems))
     for hostname in systems:
         hr = SystemInfo()
         hr.get_system_info(hostname)
         # monitors disk
         used_disk_percent = getattr(hr,
                                     'system_disk_used_percent', None)
         if used_disk_percent is None:
             _msg = hostname
             _msg += ": unable to get disk info"
             self.hardware_report_timer.cancel()
             raise SkipTest(_msg)
         elif 70 <= used_disk_percent < 95:
             _msg = hostname + ": disk usage is at "
             _msg += str(used_disk_percent) + "%"
             _msg += ", disk cleanup is recommended."
             self.logger.warning(_msg)
         elif used_disk_percent >= 95:
             _msg = hostname + ":disk usage > 95%, skipping the test(s)"
             self.hardware_report_timer.cancel()
             raise SkipTest(_msg)
         # checks for core files
         pbs_conf = du.parse_pbs_config(hostname)
         mom_priv_path = os.path.join(pbs_conf["PBS_HOME"], "mom_priv")
         if du.isdir(hostname=hostname, path=mom_priv_path):
             mom_priv_files = du.listdir(
                 hostname=hostname,
                 path=mom_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(mom_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += mom_priv_path
                 self.logger.warning(_msg)
         server_priv_path = os.path.join(
             pbs_conf["PBS_HOME"], "server_priv")
         if du.isdir(hostname=hostname, path=server_priv_path):
             server_priv_files = du.listdir(
                 hostname=hostname,
                 path=server_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(server_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += server_priv_path
                 self.logger.warning(_msg)
         sched_priv_path = os.path.join(pbs_conf["PBS_HOME"], "sched_priv")
         if du.isdir(hostname=hostname, path=sched_priv_path):
             sched_priv_files = du.listdir(
                 hostname=hostname,
                 path=sched_priv_path,
                 sudo=True,
                 fullpath=False)
             if fnmatch.filter(sched_priv_files, "core*"):
                 _msg = hostname + ": core files found in "
                 _msg += sched_priv_path
                 self.logger.warning(_msg)
         for u in PBS_ALL_USERS:
             user_home_files = du.listdir(hostname=hostname, path=u.home,
                                          sudo=True, fullpath=False,
                                          runas=u.name)
             if user_home_files and fnmatch.filter(
                     user_home_files, "core*"):
                 _msg = hostname + ": user-" + str(u)
                 _msg += ": core files found in "
                 self.logger.warning(_msg + u.home)
Ejemplo n.º 6
0
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host, path=datadir, mode=0755,
                      parents=True, logerr=False, level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0) and
                (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'],
                                'unsupported', 'pbs_diag')
        cur_user = self.du.get_current_user()
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', cur_user]
        cmd += ['-o', pwd.getpwnam(cur_user).pw_dir]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest,
                               sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host, path=diag_out, sudo=True, force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host, path=path, sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host, path=diag_out_dest,
                       force=True, sudo=True, level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host, cmd, sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host, path=c, force=True,
                               sudo=True, level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host, diag_out_dest, sudo=True,
                       recursive=True, force=True, level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz',
                               dest, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host, path=diag_out_dest + '.tar.gz',
                   force=True, sudo=True, level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Ejemplo n.º 7
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host,
                      path=datadir,
                      mode=0755,
                      parents=True,
                      logerr=False,
                      level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0)
                and (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported',
                                'pbs_diag')
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', self.du.get_current_user()]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host,
                               diag_out_r,
                               diag_out_dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host,
                       path=diag_out,
                       sudo=True,
                       force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host,
                                    path=path,
                                    sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host,
                       path=diag_out_dest,
                       force=True,
                       sudo=True,
                       level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host,
                                      cmd,
                                      sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host,
                               path=c,
                               force=True,
                               sudo=True,
                               level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host,
                       diag_out_dest,
                       sudo=True,
                       recursive=True,
                       force=True,
                       level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host,
                               diag_out_dest + '.tar.gz',
                               dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host,
                   path=diag_out_dest + '.tar.gz',
                   force=True,
                   sudo=True,
                   level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Ejemplo n.º 8
0
    def _cleanup(self):
        self.logger.info('Cleaning up temporary files')
        du = DshUtils()
        hosts = set(self.param_dict['moms']).union(
            set(self.param_dict['servers']))
        for user in PBS_USERS:
            self.logger.debug('Cleaning %s\'s home directory' % (str(user)))
            runas = PbsUser.get_user(user)
            for host in hosts:
                ret = du.run_cmd(host, cmd=['echo', '$HOME'], sudo=True,
                                 runas=runas, logerr=False, as_script=True,
                                 level=logging.DEBUG)
                if ret['rc'] == 0:
                    path = ret['out'][0].strip()
                else:
                    return None
                ftd = []
                files = du.listdir(host, path=path, runas=user,
                                   level=logging.DEBUG)
                bn = os.path.basename
                ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
                ftd.extend([f for f in files if bn(f).startswith('STDIN')])

                if len(ftd) > 1000:
                    for i in range(0, len(ftd), 1000):
                        j = i + 1000
                        du.rm(host, path=ftd[i:j], runas=user,
                              force=True, level=logging.DEBUG)

        root_dir = os.sep
        dirlist = set([os.path.join(root_dir, 'tmp'),
                       os.path.join(root_dir, 'var', 'tmp')])
        # get tmp dir from the environment
        for envname in 'TMPDIR', 'TEMP', 'TMP':
            dirname = os.getenv(envname)
            if dirname:
                dirlist.add(dirname)

        p = re.compile(r'^pbs\.\d+')
        for tmpdir in dirlist:
            # list the contents of each tmp dir and
            # get the file list to be deleted
            self.logger.info('Cleaning up ' + tmpdir + ' dir')
            ftd = []
            files = du.listdir(path=tmpdir)
            bn = os.path.basename
            ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
            ftd.extend([f for f in files if bn(f).startswith('STDIN')])
            ftd.extend([f for f in files if bn(f).startswith('pbsscrpt')])
            ftd.extend([f for f in files if bn(f).startswith('pbs.conf.')])
            ftd.extend([f for f in files if p.match(bn(f))])
            for f in ftd:
                du.rm(path=f, sudo=True, recursive=True, force=True,
                      level=logging.DEBUG)
        for f in du.tmpfilelist:
            du.rm(path=f, sudo=True, force=True, level=logging.DEBUG)
        del du.tmpfilelist[:]
        tmpdir = tempfile.gettempdir()
        os.chdir(tmpdir)
        tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid())
        if du.isdir(path=tmppath):
            du.rm(path=tmppath, recursive=True, sudo=True, force=True,
                  level=logging.DEBUG)