def priv_ports_info(hostname=None): """ Return a list of privileged ports in use on a given host :param hostname: The host on which to query privilege ports usage. Defaults to the local host :type hostname: str or None """ from ptl.utils.pbs_dshutils import DshUtils netstat_tag = re.compile("tcp[\s]+[\d]+[\s]+[\d]+[\s]+" "(?P<srchost>[\w\*\.]+):(?P<srcport>[\d]+)" "[\s]+(?P<desthost>[\.\w\*:]+):" "(?P<destport>[\d]+)" "[\s]+(?P<state>[\w]+).*") du = DshUtils() ret = du.run_cmd(hostname, ['netstat', '-at', '--numeric-ports']) if ret['rc'] != 0: return False msg = [] lines = ret['out'] resv_ports = {} source_hosts = [] for line in lines: m = netstat_tag.match(line) if m: srcport = int(m.group('srcport')) srchost = m.group('srchost') destport = int(m.group('destport')) desthost = m.group('desthost') if srcport < 1024: if srchost not in source_hosts: source_hosts.append(srchost) msg.append(line) if srchost not in resv_ports: resv_ports[srchost] = [srcport] elif srcport not in resv_ports[srchost]: resv_ports[srchost].append(srcport) if destport < 1024: msg.append(line) if desthost not in resv_ports: resv_ports[desthost] = [destport] elif destport not in resv_ports[desthost]: resv_ports[desthost].append(destport) if len(resv_ports) > 0: msg.append('\nPrivilege ports in use: ') for k, v in resv_ports.items(): msg.append('\t' + k + ': ' + str(",".join(map(lambda l: str(l), v)))) for sh in source_hosts: msg.append('\nTotal on ' + sh + ': ' + str(len(resv_ports[sh]))) else: msg.append('No privileged ports currently allocated') return msg
def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils()
def _cleanup(self): self.logger.info('Cleaning up temporary files') du = DshUtils() root_dir = os.sep dirlist = set([os.path.join(root_dir, 'tmp'), os.path.join(root_dir, 'var', 'tmp')]) # get tmp dir from the environment for envname in 'TMPDIR', 'TEMP', 'TMP': dirname = os.getenv(envname) if dirname: dirlist.add(dirname) p = re.compile(r'^pbs\.\d+') for tmpdir in dirlist: # list the contents of each tmp dir and # get the file list to be deleted self.logger.info('Cleaning up ' + tmpdir + ' dir') ftd = [] files = du.listdir(path=tmpdir) bn = os.path.basename ftd.extend([f for f in files if bn(f).startswith('PtlPbs')]) ftd.extend([f for f in files if bn(f).startswith('STDIN')]) ftd.extend([f for f in files if bn(f).startswith('pbsscrpt')]) ftd.extend([f for f in files if bn(f).startswith('pbs.conf.')]) ftd.extend([f for f in files if p.match(bn(f))]) for f in ftd: du.rm(path=f, sudo=True, recursive=True, force=True, level=logging.DEBUG) tmpdir = tempfile.gettempdir() os.chdir(tmpdir) tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid()) if du.isdir(path=tmppath): du.rm(path=tmppath, recursive=True, sudo=True, force=True, level=logging.DEBUG)
class SyncData(threading.Thread): """ Sync thread """ def __init__(self, sharedpath, queue): threading.Thread.__init__(self) self.sharedpath = sharedpath self.queue = queue self._go = True self.du = DshUtils() def run(self): while self._go: try: host, datadir, bi, sn, hostname, tn, lp = self.queue.get(False, 1.0) except Queue.Empty: continue destdatadir = os.path.join(self.sharedpath, bi, sn, hostname, tn, lp) homedir = os.path.join(datadir, 'PBS_' + host) _s = ['#!/bin/bash'] _s += ['mkdir -p %s' % (destdatadir)] _s += ['chmod -R 0755 %s' % (destdatadir)] _s += ['cp -rp %s %s' % (homedir, destdatadir)] _s += ['cp %s/qstat_tf %s' % (datadir, destdatadir)] _s += ['cp %s/pbsnodes %s' % (datadir, destdatadir)] _s += ['cp %s/print_server %s' % (datadir, destdatadir)] _s += ['cp %s/logfile_* %s' % (datadir, destdatadir)] _s += ['cat %s/uptime >> %s/uptime' % (datadir, destdatadir)] _s += ['cat %s/vmstat >> %s/vmstat' % (datadir, destdatadir)] _s += ['cat %s/netstat >> %s/netstat' % (datadir, destdatadir)] _s += ['cat %s/ps >> %s/ps' % (datadir, destdatadir)] _s += ['cat %s/df >> %s/df' % (datadir, destdatadir)] fd, fn = self.du.mkstemp(host, mode=0755, body='\n'.join(_s)) os.close(fd) self.du.run_cmd(host, cmd=fn, sudo=True) def stop(self): self._go = False
def _cleanup(self): self.logger.info('Cleaning up temporary files') du = DshUtils() tmpdir = tempfile.gettempdir() ftd = [] if tmpdir: files = du.listdir(path=tmpdir) bn = os.path.basename ftd.extend([f for f in files if bn(f).startswith('PtlPbs')]) ftd.extend([f for f in files if bn(f).startswith('STDIN')]) for f in ftd: du.rm(path=f, sudo=True, recursive=True, force=True, level=logging.DEBUG) os.chdir(tmpdir) tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid()) if du.isdir(path=tmppath): du.rm(path=tmppath, recursive=True, sudo=True, force=True, level=logging.DEBUG)
def __init__(self, sharedpath, queue): threading.Thread.__init__(self) self.sharedpath = sharedpath self.queue = queue self._go = True self.du = DshUtils()
class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxint - 3 logger = logging.getLogger(__name__) def __init__(self): self.sharedpath = None self.du = DshUtils() self.__syncth = None self.__queue = Queue.Queue() def options(self, parser, env): """ Register command line options """ pass def set_data(self, sharedpath): self.sharedpath = sharedpath def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config self.enabled = True def __get_sntnbi_name(self, test): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: return ('unknown', 'unknown', 'unknown') tn = getattr(_test, '_testMethodName', 'unknown') if (hasattr(_test, 'server') and (getattr(_test, 'server', None) is not None)): bi = _test.server.attributes['pbs_version'] else: bi = 'unknown' return (sn, tn, bi) def __save_home(self, test, status): if hasattr(test, 'test'): _test = test.test elif hasattr(test, 'context'): _test = test.context else: # test does not have any PBS Objects, so just return return if not hasattr(_test, 'server'): # test does not have any PBS Objects, so just return return st = getattr(test, 'start_time', None) if st is not None: st = time.mktime(st.timetuple()) else: st = time.time() st -= 180 # starttime - 3 min et = getattr(test, 'end_time', None) if et is not None: et = time.mktime(et.timetuple()) else: et = time.time() hostname = socket.gethostname().split('.')[0] lp = os.environ.get('PBS_JOBID', time.strftime("%Y%b%d_%H_%m_%S", time.localtime())) sn, tn, bi = self.__get_sntnbi_name(test) if getattr(_test, 'servers', None) is not None: shosts = map(lambda x: x.split('.')[0], _test.servers.host_keys()) else: shosts = [] if getattr(_test, 'schedulers', None) is not None: schosts = map(lambda x: x.split('.')[0], _test.schedulers.host_keys()) else: schosts = [] if getattr(_test, 'moms', None) is not None: mhosts = map(lambda x: x.split('.')[0], _test.moms.host_keys()) else: mhosts = [] hosts = [] hosts.extend(shosts) hosts.extend(schosts) hosts.extend(mhosts) hosts.append(hostname) hosts = sorted(set(hosts)) for host in hosts: confpath = self.du.get_pbs_conf_file(host) tmpdir = self.du.get_tempdir(host) datadir = os.path.join(tmpdir, bi, sn, hostname, tn, lp) _s = ['#!/bin/bash'] _s += ['. %s' % (confpath)] _s += ['mkdir -p %s' % (datadir)] _s += ['chmod -R 0755 %s' % (datadir)] if host == _test.server.shortname: _l = '${PBS_EXEC}/bin/qstat -tf > %s/qstat_tf &' % (datadir) _s += [_l] _l = '${PBS_EXEC}/bin/pbsnodes -av > %s/pbsnodes &' % (datadir) _s += [_l] _l = '${PBS_EXEC}/bin/qmgr -c "p s"' _l += ' > %s/print_server &' % (datadir) _s += [_l] _s += ['echo "%s" >> %s/uptime' % ('*' * 80, datadir)] _s += ['echo "On host : %s" >> %s/uptime' % (host, datadir)] _s += ['uptime >> %s/uptime' % (datadir)] _s += ['echo "" >> %s/uptime' % (datadir)] _s += ['echo "%s" >> %s/netstat' % ('*' * 80, datadir)] _s += ['echo "On host : %s" >> %s/netstat' % (host, datadir)] _cmd = self.du.which(host, 'netstat') if _cmd == 'netstat': _cmd = 'ss' if sys.platform.startswith('linux'): _cmd += ' -ap' else: _cmd += ' -an' _s += ['%s >> %s/netstat' % (_cmd, datadir)] _s += ['echo "" >> %s/netstat' % (datadir)] _s += ['echo "%s" >> %s/ps' % ('*' * 80, datadir)] _s += ['echo "On host : %s" >> %s/ps' % (host, datadir)] _s += ['ps -ef | grep pbs_ >> %s/ps' % (datadir)] _s += ['echo "" >> %s/ps' % (datadir)] _s += ['echo "%s" >> %s/df' % ('*' * 80, datadir)] _s += ['echo "On host : %s" >> %s/df' % (host, datadir)] _s += ['df -h >> %s/df' % (datadir)] _s += ['echo "" >> %s/df' % (datadir)] _s += ['echo "%s" >> %s/vmstat' % ('*' * 80, datadir)] _s += ['echo "On host : %s" >> %s/vmstat' % (host, datadir)] _s += ['vmstat >> %s/vmstat' % (datadir)] _s += ['echo "" >> %s/vmstat' % (datadir)] _dst = os.path.join(datadir, 'PBS_' + host) _s += ['cp -rp ${PBS_HOME} %s' % (_dst)] _s += ['tar -cf %s/datastore.tar %s/datastore' % (_dst, _dst)] _s += ['gzip -rf %s/datastore.tar' % (_dst)] _s += ['rm -rf %s/datastore' % (_dst)] _s += ['rm -rf %s/*_logs' % (_dst)] _s += ['rm -rf %s/server_priv/accounting' % (_dst)] _s += ['cp %s %s/pbs.conf.%s' % (confpath, _dst, host)] if host == hostname: _s += ['cat > %s/logfile_%s <<EOF' % (datadir, status)] _s += ['%s' % (getattr(test, 'err_in_string', ''))] _s += [''] _s += ['EOF'] _s += ['wait'] fd, fn = self.du.mkstemp(hostname, mode=0755, body='\n'.join(_s)) os.close(fd) self.du.run_cmd(hostname, cmd=fn, sudo=True, logerr=False) self.du.rm(hostname, fn, force=True, sudo=True) svr = _test.servers[host] if svr is not None: self.__save_logs(svr, _dst, 'server_logs', st, et) _adst = os.path.join(_dst, 'server_priv') self.__save_logs(svr, _adst, 'accounting', st, et) if getattr(_test, 'moms', None) is not None: self.__save_logs(_test.moms[host], _dst, 'mom_logs', st, et) if getattr(_test, 'schedulers', None) is not None: self.__save_logs(_test.schedulers[host], _dst, 'sched_logs', st, et) if ((self.sharedpath is not None) and (self.__syncth is not None)): self.__queue.put((host, datadir, bi, sn, hostname, tn, lp)) def __save_logs(self, obj, dst, name, st, et, jid=None): if name == 'accounting': logs = obj.log_lines('accounting', n='ALL', starttime=st, endtime=et) logs = map(lambda x: x + '\n', logs) elif name == 'tracejob': logs = obj.log_lines('tracejob', id=jid, n='ALL') name += '_' + jid else: logs = obj.log_lines(obj, n='ALL', starttime=st, endtime=et) f = open(os.path.join(dst, name), 'w+') f.writelines(logs) f.close() def begin(self): if self.sharedpath is not None: self.__syncth = SyncData(self.sharedpath, self.__queue) self.__syncth.daemon = True self.__syncth.start() def addError(self, test, err): self.__save_home(test, 'ERROR') def addFailure(self, test, err): self.__save_home(test, 'FAIL') def finalize(self, result): if ((self.sharedpath is not None) and (self.__syncth is not None)): while not self.__queue.empty(): pass self.__syncth.stop() self.__syncth.join()
def __init__(self): self.du = DshUtils() (self.node_status, self.node_summary) = self.parse_apstat_rn()
class ProcMonitor(threading.Thread): """ A background process monitoring tool """ du = DshUtils() def __init__(self, name=None, regexp=False, frequency=60): threading.Thread.__init__(self) self.name = name self.frequency = frequency self.regexp = regexp self._pu = ProcUtils() self.stop_thread = threading.Event() self.db_proc_info = [] def set_frequency(self, value=60): """ Set the frequency :param value: Frequency value :type value: int """ self.logger.debug('procmonitor: set frequency to ' + str(value)) self.frequency = value def get_system_stats(self, nw_protocols=None): """ Run system monitoring """ timenow = int(time.time()) sysstat = {} # if no protocols set, use default if not nw_protocols: nw_protocols = ['TCP'] cmd = 'sar -rSub -n %s 1 1' % ','.join(nw_protocols) rv = self.du.run_cmd(cmd=cmd, as_script=True) if rv['err']: return None op = rv['out'][2:] op = [i.split()[2:] for i in op if (i and not i.startswith('Average'))] sysstat['name'] = "System" sysstat['time'] = time.ctime(timenow) for i in range(0, len(op), 2): sysstat.update(dict(zip(op[i], op[i + 1]))) return sysstat def run(self): """ Run the process monitoring """ while not self.stop_thread.is_set(): self._pu.get_proc_info(name=self.name, regexp=self.regexp) for _p in self._pu.processes.values(): for _per_proc in _p: if bool(re.search("^((?!benchpress).)*$", _per_proc.name)): _to_db = {} _to_db['time'] = time.ctime(int(_per_proc.time)) _to_db['rss'] = _per_proc.rss _to_db['vsz'] = _per_proc.vsz _to_db['pcpu'] = _per_proc.pcpu _to_db['pmem'] = _per_proc.pmem _to_db['size'] = _per_proc.size _to_db['cputime'] = _per_proc.cputime _to_db['name'] = _per_proc.name self.db_proc_info.append(_to_db) _sys_info = self.get_system_stats(nw_protocols=['TCP']) if _sys_info is not None: self.db_proc_info.append(_sys_info) with open('proc_monitor.json', 'a+', encoding='utf-8') as proc: json.dump( self.db_proc_info, proc, ensure_ascii=False, indent=4) time.sleep(self.frequency) def stop(self): """ Stop the process monitoring """ self.stop_thread.set() self.join()
class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxsize - 6 logger = logging.getLogger(__name__) def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils() def options(self, parser, env): """ Register command line options """ pass def set_data(self, post_data_dir, max_postdata_threshold): self.post_data_dir = post_data_dir self.max_postdata_threshold = max_postdata_threshold def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config if self.post_data_dir is not None: self.enabled = True else: self.enabled = False def __save_home(self, test, status, err=None): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: # test does not have any PBS Objects, so just return return if self.__priv_sn != sn: self.__save_data_count = 0 self.__priv_sn = sn # Saving home might take time so disable timeout # handler set by runner tn = getattr(_test, '_testMethodName', 'unknown') testlogs = getattr(test, 'captured_logs', '') datadir = os.path.join(self.post_data_dir, sn, tn) if os.path.exists(datadir): _msg = 'Old post analysis data exists at %s' % datadir _msg += ', skipping saving data for this test case' self.logger.warn(_msg) _msg = 'Please remove old directory or' _msg += ' provide different directory' self.logger.warn(_msg) return if getattr(test, 'old_sigalrm_handler', None) is not None: _h = getattr(test, 'old_sigalrm_handler') signal.signal(signal.SIGALRM, _h) signal.alarm(0) self.logger.log(logging.DEBUG2, 'Saving post analysis data...') current_host = socket.gethostname().split('.')[0] self.du.mkdir(current_host, path=datadir, mode=0o755, parents=True, logerr=False, level=logging.DEBUG2) if err is not None: if isclass(err[0]) and issubclass(err[0], SkipTest): status = 'SKIP' status_data = 'Reason = %s' % (err[1]) else: if isclass(err[0]) and issubclass(err[0], TimeOut): status = 'TIMEDOUT' status_data = getattr(test, 'err_in_string', '') else: status_data = '' logfile = os.path.join(datadir, 'logfile_' + status) f = open(logfile, 'w+') f.write(testlogs + '\n') f.write(status_data + '\n') f.write('test duration: %s\n' % str(getattr(test, 'duration', '0'))) if status in ('PASS', 'SKIP'): # Test case passed or skipped, no need to save post analysis data f.close() return if ((self.max_postdata_threshold != 0) and (self.__save_data_count >= self.max_postdata_threshold)): _msg = 'Total number of saved post analysis data for this' _msg += ' testsuite is exceeded max postdata threshold' _msg += ' (%d)' % self.max_postdata_threshold f.write(_msg + '\n') self.logger.error(_msg) f.close() return servers = getattr(_test, 'servers', None) if servers is not None: server_host = servers.values()[0].shortname else: _msg = 'Could not find Server Object in given test object' _msg += ', skipping saving post analysis data' f.write(_msg + '\n') self.logger.warning(_msg) f.close() return moms = getattr(_test, 'moms', None) comms = getattr(_test, 'comms', None) client = getattr(_test.servers.values()[0], 'client', None) server = servers.values()[0] add_hosts = [] if len(servers) > 1: for param in servers.values()[1:]: add_hosts.append(param.shortname) if moms is not None: for param in moms.values(): add_hosts.append(param.shortname) if comms is not None: for param in comms.values(): add_hosts.append(param.shortname) if client is not None: add_hosts.append(client.split('.')[0]) add_hosts = list(set(add_hosts) - set([server_host])) pbs_snapshot_path = os.path.join( server.pbs_conf["PBS_EXEC"], "sbin", "pbs_snapshot") cur_user = self.du.get_current_user() cur_user_dir = pwd.getpwnam(cur_user).pw_dir cmd = [ pbs_snapshot_path, '-H', server_host, '--daemon-logs', '2', '--accounting-logs', '2', '--with-sudo' ] if len(add_hosts) > 0: cmd += ['--additional-hosts=' + ','.join(add_hosts)] cmd += ['-o', cur_user_dir] ret = self.du.run_cmd(current_host, cmd, level=logging.DEBUG2, logerr=False) if ret['rc'] != 0: _msg = 'Failed to get analysis information for ' _msg += 'on %s:' % servers_host _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: if len(ret['out']) == 0: self.logger.error('Snapshot command failed') f.close() return snap_out = ret['out'][0] snap_out_dest = (snap_out.split(":")[1]).strip() dest = os.path.join(datadir, 'PBS_' + server_host + '.tar.gz') ret = self.du.run_copy(current_host, snap_out_dest, dest, sudo=True, level=logging.DEBUG2) self.du.rm(current_host, path=snap_out_dest, recursive=True, force=True, level=logging.DEBUG2) f.close() self.__save_data_count += 1 _msg = 'Saved post analysis data' self.logger.info(_msg) def addError(self, test, err): self.__save_home(test, 'ERROR', err) def addFailure(self, test, err): self.__save_home(test, 'FAIL', err) def addSuccess(self, test): self.__save_home(test, 'PASS')
def __init__(self): self.sharedpath = None self.du = DshUtils() self.__syncth = None self.__queue = Queue.Queue()
class PBSTestSuite(unittest.TestCase): """ Generic ``setup``, ``teardown``, and ``logging`` functions to be used as parent class for most tests. Class instantiates: ``server object connected to localhost`` ``scheduler objected connected to localhost`` ``mom object connected to localhost`` Custom parameters: :param server: The hostname on which the PBS ``server/scheduler`` are running :param mom: The hostname on which the PBS MoM is running :param servers: Colon-separated list of hostnames hosting a PBS server. Servers are then accessible as a dictionary in the instance variable servers. :param client: For CLI mode only, name of the host on which the PBS client commands are to be run from. Format is ``<host>@<path-to-config-file>`` :param moms: Colon-separated list of hostnames hosting a PBS MoM. MoMs are made accessible as a dictionary in the instance variable moms. :param comms: Colon-separated list of hostnames hosting a PBS Comm. Comms are made accessible as a dictionary in the instance variable comms. :param nomom=<host1>\:<host2>...: expect no MoM on given set of hosts :param mode: Sets mode of operation to PBS server. Can be either ``'cli'`` or ``'api'``.Defaults to API behavior. :param conn_timeout: set a timeout in seconds after which a pbs_connect IFL call is refreshed (i.e., disconnected) :param skip-setup: Bypasses setUp of PBSTestSuite (not custom ones) :param skip-teardown: Bypasses tearDown of PBSTestSuite (not custom ones) :param procinfo: Enables process monitoring thread, logged into ptl_proc_info test metrics. The value can be set to _all_ to monitor all PBS processes,including ``pbs_server``, ``pbs_sched``, ``pbs_mom``, or a process defined by name. :param revert-to-defaults=<True|False>: if False, will not revert to defaults.True by default. :param revert-hooks=<True|False>: if False, do not revert hooks to defaults.Defaults to True. ``revert-to-defaults`` set to False overrides this setting. :param del-hooks=<True|False>: If False, do not delete hooks. Defaults to False.``revert-to-defaults`` set to False overrides this setting. :param revert-queues=<True|False>: If False, do not revert queues to defaults.Defaults to True. ``revert-to-defaults`` set to False overrides this setting. :param revert-resources=<True|False>: If False, do not revert resources to defaults. Defaults to True. ``revert-to-defaults`` set to False overrides this setting. :param del-queues=<True|False>: If False, do not delete queues. Defaults to False.``revert-to-defaults`` set to Falseoverrides this setting. :param del-vnodes=<True|False>: If False, do not delete vnodes on MoM instances.Defaults to True. :param server-revert-to-defaults=<True|False>: if False, don't revert Server to defaults :param comm-revert-to-defaults=<True|False>: if False, don't revert Comm to defaults :param mom-revert-to-defaults=<True|False>: if False, don't revert MoM to defaults :param sched-revert-to-defaults=<True|False>: if False, don't revert Scheduler to defaults :param procmon: Enables process monitoring. Multiple values must be colon separated. For example to monitor ``server``, ``sched``, and ``mom`` use ``procmon=pbs_server:pbs_sched:pbs_mom`` :param procmon-freq: Sets a polling frequency for the process monitoring tool.Defaults to 10 seconds. :param test-users: colon-separated list of users to use as test users. The users specified override the default users in the order in which they appear in the ``PBS_USERS`` list. :param default-testcase-timeout: Default test case timeout value. :param data-users: colon-separated list of data users. :param oper-users: colon-separated list of operator users. :param mgr-users: colon-separated list of manager users. :param root-users: colon-separated list of root users. :param build-users: colon-separated list of build users. :param clienthost: the hostnames to set in the MoM config file """ logger = logging.getLogger(__name__) metrics_data = {} conf = {} param = None du = DshUtils() _procmon = None _process_monitoring = False revert_to_defaults = True server_revert_to_defaults = True mom_revert_to_defaults = True sched_revert_to_defaults = True revert_queues = True revert_resources = True revert_hooks = True del_hooks = True del_queues = True del_vnodes = True server = None scheduler = None mom = None comm = None servers = None schedulers = None moms = None comms = None @classmethod def setUpClass(cls): cls.log_enter_setup(True) cls._testMethodName = 'setUpClass' cls.parse_param() cls.init_param() cls.check_users_exist() cls.init_servers() cls.init_comms() cls.init_schedulers() cls.init_moms() cls.log_end_setup(True) def setUp(self): if 'skip-setup' in self.conf: return self.log_enter_setup() self.init_proc_mon() self.revert_servers() self.revert_comms() self.revert_schedulers() self.revert_moms() self.log_end_setup() @classmethod def log_enter_setup(cls, iscls=False): _m = ' Entered ' + cls.__name__ + ' setUp' if iscls: _m += 'Class' _m_len = len(_m) cls.logger.info('=' * _m_len) cls.logger.info(_m) cls.logger.info('=' * _m_len) @classmethod def log_end_setup(cls, iscls=False): _m = 'Completed ' + cls.__name__ + ' setUp' if iscls: _m += 'Class' _m_len = len(_m) cls.logger.info('=' * _m_len) cls.logger.info(_m) cls.logger.info('=' * _m_len) @classmethod def _validate_param(cls, pname): """ Check if parameter was enabled at the ``command-line`` :param pname: parameter name :type pname: str :param pvar: class variable to set according to command-line setting """ if pname not in cls.conf: return if cls.conf[pname] in PTL_TRUE: setattr(cls, pname.replace('-', '_'), True) else: setattr(cls, pname.replace('-', '_'), False) @classmethod def _set_user(cls, name, user_list): if name in cls.conf: for idx, u in enumerate(cls.conf[name].split(':')): user_list[idx].__init__(u) @classmethod def check_users_exist(cls): """ Check whether the user is exist or not """ testusersexist = True for u in [TEST_USER, TEST_USER1, TEST_USER2, TEST_USER3]: rv = cls.du.check_user_exists(str(u)) if not rv: _msg = 'User ' + str(u) + ' does not exist!' raise setUpClassError(_msg) return testusersexist @classmethod def kicksched_action(cls, server, obj_type, *args, **kwargs): """ custom scheduler action to kick a scheduling cycle when expectig a job state change """ if server is None: cls.logger.error('no server defined for custom action') return if obj_type == JOB: if (('scheduling' in server.attributes) and (server.attributes['scheduling'] != 'False')): server.manager(MGR_CMD_SET, MGR_OBJ_SERVER, {'scheduling': 'True'}, level=logging.DEBUG) @classmethod def parse_param(cls): """ get test configuration parameters as a ``comma-separated`` list of attributes. Attributes may be ``'='`` separated key value pairs or standalone entries. ``Multi-property`` attributes are colon-delimited. """ if cls.param is None: return for h in cls.param.split(','): if '=' in h: k, v = h.split('=') cls.conf[k.strip()] = v.strip() else: cls.conf[h.strip()] = '' if (('clienthost' in cls.conf) and not isinstance(cls.conf['clienthost'], list)): cls.conf['clienthost'] = cls.conf['clienthost'].split(':') users_map = [('test-users', PBS_USERS), ('oper-users', PBS_OPER_USERS), ('mgr-users', PBS_MGR_USERS), ('data-users', PBS_DATA_USERS), ('root-users', PBS_ROOT_USERS), ('build-users', PBS_BUILD_USERS)] for k, v in users_map: cls._set_user(k, v) @classmethod def init_param(cls): cls._validate_param('revert-to-defaults') cls._validate_param('server-revert-to-defaults') cls._validate_param('comm-revert-to-defaults') cls._validate_param('mom-revert-to-defaults') cls._validate_param('sched-revert-to-defaults') cls._validate_param('del-hooks') cls._validate_param('revert-hooks') cls._validate_param('del-queues') cls._validate_param('del-vnodes') cls._validate_param('revert-queues') cls._validate_param('revert-resources') if 'default-testcase-timeout' not in cls.conf.keys(): cls.conf['default_testcase_timeout'] = 180 else: cls.conf['default_testcase_timeout'] = int( cls.conf['default-testcase-timeout']) @classmethod def is_server_licensed(cls, server): """ Check if server is licensed or not """ for i in range(0, 10, 1): lic = server.status(SERVER, 'license_count', level=logging.INFOCLI) if lic and 'license_count' in lic[0]: lic = PbsTypeLicenseCount(lic[0]['license_count']) if int(lic['Avail_Sockets']) > 0: return True elif int(lic['Avail_Global']) > 0: return True elif int(lic['Avail_Local']) > 0: return True time.sleep(i) return False @classmethod def init_from_conf(cls, conf, single=None, multiple=None, skip=None, func=None): """ Helper method to parse test parameters for`` mom/server/scheduler`` instances. The supported format of each service request is: ``hostname@configuration/path`` For example: ``pbs_benchpress -p server=remote@/etc/pbs.conf.12.0`` initializes a remote server instance that is configured according to the remote file ``/etc/pbs.conf.12.0`` """ endpoints = [] if ((multiple in conf) and (conf[multiple] is not None)): __objs = conf[multiple].split(':') for _m in __objs: tmp = _m.split('@') if len(tmp) == 2: endpoints.append(tuple(tmp)) elif len(tmp) == 1: endpoints.append((tmp[0], None)) elif ((single in conf) and (conf[single] is not None)): tmp = conf[single].split('@') if len(tmp) == 2: endpoints.append(tuple(tmp)) elif len(tmp) == 1: endpoints.append((tmp[0], None)) else: endpoints = [(socket.gethostname(), None)] objs = PBSServiceInstanceWrapper() for name, objconf in endpoints: if ((skip is not None) and (skip in conf) and ((name in conf[skip]) or (conf[skip] in name))): continue if objconf is not None: n = name + '@' + objconf else: n = name if getattr(cls, "server", None) is not None: objs[n] = func(name, pbsconf_file=objconf, server=cls.server.hostname) else: objs[n] = func(name, pbsconf_file=objconf) if objs[n] is None: _msg = 'Failed %s(%s, %s)' % (func.__name__, name, objconf) raise setUpClassError(_msg) return objs @classmethod def init_servers(cls, init_server_func=None, skip=None): """ Initialize servers """ if init_server_func is None: init_server_func = cls.init_server if 'servers' in cls.conf: if 'comms' not in cls.conf: cls.conf['comms'] = cls.conf['servers'] if 'schedulers' not in cls.conf: cls.conf['schedulers'] = cls.conf['servers'] if 'moms' not in cls.conf: cls.conf['moms'] = cls.conf['servers'] if 'server' in cls.conf: if 'comm' not in cls.conf: cls.conf['comm'] = cls.conf['server'] if 'scheduler' not in cls.conf: cls.conf['scheduler'] = cls.conf['server'] if 'mom' not in cls.conf: cls.conf['mom'] = cls.conf['server'] cls.servers = cls.init_from_conf(conf=cls.conf, single='server', multiple='servers', skip=skip, func=init_server_func) if cls.servers: cls.server = cls.servers.values()[0] @classmethod def init_comms(cls, init_comm_func=None, skip=None): """ Initialize comms """ if init_comm_func is None: init_comm_func = cls.init_comm cls.comms = cls.init_from_conf(conf=cls.conf, single='comm', multiple='comms', skip=skip, func=init_comm_func) if cls.comms: cls.comm = cls.comms.values()[0] @classmethod def init_schedulers(cls, init_sched_func=None, skip=None): """ Initialize schedulers """ if init_sched_func is None: init_sched_func = cls.init_scheduler cls.schedulers = cls.init_from_conf(conf=cls.conf, single='scheduler', multiple='schedulers', skip=skip, func=init_sched_func) if cls.schedulers: cls.scheduler = cls.schedulers.values()[0] @classmethod def init_moms(cls, init_mom_func=None, skip='nomom'): """ Initialize moms """ if init_mom_func is None: init_mom_func = cls.init_mom cls.moms = cls.init_from_conf(conf=cls.conf, single='mom', multiple='moms', skip=skip, func=init_mom_func) if cls.moms: cls.mom = cls.moms.values()[0] @classmethod def init_server(cls, hostname, pbsconf_file=None): """ Initialize a server instance Define custom expect action to trigger a scheduling cycle when job is not in running state :returns: The server instance on success and None on failure """ client = hostname client_conf = None if 'client' in cls.conf: _cl = cls.conf['client'].split('@') client = _cl[0] if len(_cl) > 1: client_conf = _cl[1] server = Server(hostname, pbsconf_file=pbsconf_file, client=client, client_pbsconf_file=client_conf) server._conn_timeout = 0 if cls.conf is not None: if 'mode' in cls.conf: if cls.conf['mode'] == 'cli': server.set_op_mode(PTL_CLI) if 'conn_timeout' in cls.conf: conn_timeout = int(cls.conf['conn_timeout']) server.set_connect_timeout(conn_timeout) sched_action = ExpectAction('kicksched', True, JOB, cls.kicksched_action) server.add_expect_action(action=sched_action) return server @classmethod def init_comm(cls, hostname, pbsconf_file=None, server=None): """ Initialize a Comm instance associated to the given hostname. This method must be called after init_server :param hostname: The host on which the Comm is running :type hostname: str :param pbsconf_file: Optional path to an alternate pbs config file :type pbsconf_file: str or None :returns: The instantiated Comm upon success and None on failure. :param server: The server name associated to the Comm :type server: str Return the instantiated Comm upon success and None on failure. """ try: server = cls.servers[server] except: server = None return Comm(hostname, pbsconf_file=pbsconf_file, server=server) @classmethod def init_scheduler(cls, hostname, pbsconf_file=None, server=None): """ Initialize a Scheduler instance associated to the given server. This method must be called after ``init_server`` :param server: The server name associated to the scheduler :type server: str :param pbsconf_file: Optional path to an alternate config file :type pbsconf_file: str or None :param hostname: The host on which Sched is running :type hostname: str :returns: The instantiated scheduler upon success and None on failure """ try: server = cls.servers[server] except: server = None return Scheduler(hostname=hostname, server=server, pbsconf_file=pbsconf_file) @classmethod def init_mom(cls, hostname, pbsconf_file=None, server=None): """ Initialize a ``MoM`` instance associated to the given hostname. This method must be called after ``init_server`` :param hostname: The host on which the MoM is running :type hostname: str :param pbsconf_file: Optional path to an alternate pbs config file :type pbsconf_file: str or None :returns: The instantiated MoM upon success and None on failure. """ try: server = cls.servers[server] except: server = None return MoM(hostname, pbsconf_file=pbsconf_file, server=server) def init_proc_mon(self): """ Initialize process monitoring when requested """ if 'procmon' in self.conf: _proc_mon = [] for p in self.conf['procmon'].split(':'): _proc_mon += ['.*' + p + '.*'] if _proc_mon: if 'procmon-freq' in self.conf: freq = int(self.conf['procmon-freq']) else: freq = 10 self.start_proc_monitor(name='|'.join(_proc_mon), regexp=True, frequency=freq) self._process_monitoring = True def revert_servers(self, force=False): """ Revert the values set for servers """ for server in self.servers.values(): self.revert_server(server, force) def revert_comms(self, force=False): """ Revert the values set for comms """ for comm in self.comms.values(): self.revert_comm(comm, force) def revert_schedulers(self, force=False): """ Revert the values set for schedulers """ for sched in self.schedulers.values(): self.revert_scheduler(sched, force) def revert_moms(self, force=False): """ Revert the values set for moms """ self.del_all_nodes = True for mom in self.moms.values(): self.revert_mom(mom, force) def revert_server(self, server, force=False): """ Revert the values set for server """ rv = server.isUp() if not rv: self.logger.error('server ' + server.hostname + ' is down') server.start() msg = 'Failed to restart server ' + server.hostname self.assertTrue(server.isUp(), msg) server_stat = server.status(SERVER)[0] current_user = pwd.getpwuid(os.getuid())[0] try: # remove current user's entry from managers list (if any) a = {ATTR_managers: (DECR, current_user + '@*')} server.manager(MGR_CMD_SET, SERVER, a, sudo=True) except: pass a = {ATTR_managers: (INCR, current_user + '@*')} server.manager(MGR_CMD_SET, SERVER, a, sudo=True) if ((self.revert_to_defaults and self.server_revert_to_defaults) or force): server.revert_to_defaults(reverthooks=self.revert_hooks, delhooks=self.del_hooks, revertqueues=self.revert_queues, delqueues=self.del_queues, revertresources=self.revert_resources, server_stat=server_stat) rv = self.is_server_licensed(server) _msg = 'No license found on server %s' % (server.shortname) self.assertTrue(rv, _msg) self.logger.info('server: %s licensed', server.hostname) def revert_comm(self, comm, force=False): """ Revert the values set for comm """ rv = comm.isUp() if not rv: self.logger.error('comm ' + comm.hostname + ' is down') comm.start() msg = 'Failed to restart comm ' + comm.hostname self.assertTrue(comm.isUp(), msg) def revert_scheduler(self, scheduler, force=False): """ Revert the values set for scheduler """ rv = scheduler.isUp() if not rv: self.logger.error('scheduler ' + scheduler.hostname + ' is down') scheduler.start() msg = 'Failed to restart scheduler ' + scheduler.hostname self.assertTrue(scheduler.isUp(), msg) if ((self.revert_to_defaults and self.sched_revert_to_defaults) or force): rv = scheduler.revert_to_defaults() _msg = 'Failed to revert sched %s' % (scheduler.hostname) self.assertTrue(rv, _msg) def revert_mom(self, mom, force=False): """ Revert the values set for mom :param mom: the MoM object whose values are to be reverted :type mom: MoM object :param force: Option to reverse forcibly :type force: bool """ rv = mom.isUp() if not rv: self.logger.error('mom ' + mom.hostname + ' is down') mom.start() msg = 'Failed to restart mom ' + mom.hostname self.assertTrue(mom.isUp(), msg) mom.pbs_version() if ((self.revert_to_defaults and self.mom_revert_to_defaults) or force): rv = mom.revert_to_defaults(delvnodedefs=self.del_vnodes) _msg = 'Failed to revert mom %s' % (mom.hostname) self.assertTrue(rv, _msg) if 'clienthost' in self.conf: mom.add_config({'$clienthost': self.conf['clienthost']}) a = {'state': 'free', 'resources_available.ncpus': (GE, 1)} nodes = self.server.counter(NODE, a, attrop=PTL_AND, level=logging.DEBUG) if not nodes: try: self.server.manager(MGR_CMD_DELETE, NODE, None, '') except: pass mom.delete_vnode_defs() mom.delete_vnodes() mom.restart() self.logger.info('server: no nodes defined, creating one') self.server.manager(MGR_CMD_CREATE, NODE, None, mom.shortname) name = mom.shortname if mom.platform == 'cray' or mom.platform == 'craysim': # delete all nodes(@default) on first call of revert_mom # and create all nodes specified by self.moms one by one try: if self.del_all_nodes: self.server.manager(MGR_CMD_DELETE, NODE, None, '') self.del_all_nodes = False except: pass self.server.manager(MGR_CMD_CREATE, NODE, None, name) else: try: self.server.status(NODE, id=name) except PbsStatusError: # server doesn't have node with shortname # check with hostname name = mom.hostname try: self.server.status(NODE, id=name) except PbsStatusError: # server doesn't have node for this mom yet # so create with shortname name = mom.shortname self.server.manager(MGR_CMD_CREATE, NODE, None, mom.shortname) self.server.expect(NODE, {ATTR_NODE_state: 'free'}, id=name, interval=1) return mom def analyze_logs(self): """ analyze accounting and scheduler logs from time test was started until it finished """ pla = PBSLogAnalyzer() self.metrics_data = pla.analyze_logs(serverlog=self.server.logfile, schedlog=self.scheduler.logfile, momlog=self.mom.logfile, acctlog=self.server.acctlogfile, start=self.server.ctime, end=int(time.time())) def start_proc_monitor(self, name=None, regexp=False, frequency=60): """ Start the process monitoring :param name: Process name :type name: str or None :param regexp: Regular expression to match :type regexp: bool :param frequency: Frequency of monitoring :type frequency: int """ if self._procmon is not None: self.logger.info('A process monitor is already instantiated') return self.logger.info('starting process monitoring of ' + name + ' every ' + str(frequency) + 'seconds') self._procmon = ProcMonitor(name=name, regexp=regexp, frequency=frequency) self._procmon.start() def stop_proc_monitor(self): """ Stop the process monitoring """ if not self._process_monitoring: return self.logger.info('stopping process monitoring') self._procmon.stop() self.metrics_data['procs'] = self._procmon.db_proc_info self._process_monitoring = False def skipTest(self, reason=None): """ Skip Test :param reason: message to indicate why test is skipped :type reason: str or None """ if reason: self.logger.warning('test skipped: ' + reason) else: reason = 'unknown' raise SkipTest(reason) skip_test = skipTest @classmethod def log_enter_teardown(cls, iscls=False): _m = ' Entered ' + cls.__name__ + ' tearDown' if iscls: _m += 'Class' _m_len = len(_m) cls.logger.info('=' * _m_len) cls.logger.info(_m) cls.logger.info('=' * _m_len) @classmethod def log_end_teardown(cls, iscls=False): _m = 'Completed ' + cls.__name__ + ' tearDown' if iscls: _m += 'Class' _m_len = len(_m) cls.logger.info('=' * _m_len) cls.logger.info(_m) cls.logger.info('=' * _m_len) def tearDown(self): """ verify that ``server`` and ``scheduler`` are up clean up jobs and reservations """ if 'skip-teardown' in self.conf: return self.log_enter_teardown() self.stop_proc_monitor() self.log_end_teardown() @classmethod def tearDownClass(cls): cls._testMethodName = 'tearDownClass'
def __init__(self, command): self.__du = DshUtils() self.__cmd = command
class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxint - 6 logger = logging.getLogger(__name__) def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils() def options(self, parser, env): """ Register command line options """ pass def set_data(self, post_data_dir, max_postdata_threshold): self.post_data_dir = post_data_dir self.max_postdata_threshold = max_postdata_threshold def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config if self.post_data_dir is not None: self.enabled = True else: self.enabled = False def __save_home(self, test, status, err=None): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: # test does not have any PBS Objects, so just return return if self.__priv_sn != sn: self.__save_data_count = 0 self.__priv_sn = sn # Saving home might take time so disable timeout # handler set by runner tn = getattr(_test, '_testMethodName', 'unknown') testlogs = getattr(test, 'captured_logs', '') datadir = os.path.join(self.post_data_dir, sn, tn) if os.path.exists(datadir): _msg = 'Old post analysis data exists at %s' % datadir _msg += ', skipping saving data for this test case' self.logger.warn(_msg) _msg = 'Please remove old directory or' _msg += ' provide different directory' self.logger.warn(_msg) return if getattr(test, 'old_sigalrm_handler', None) is not None: _h = getattr(test, 'old_sigalrm_handler') signal.signal(signal.SIGALRM, _h) signal.alarm(0) self.logger.log(logging.DEBUG2, 'Saving post analysis data...') current_host = socket.gethostname().split('.')[0] self.du.mkdir(current_host, path=datadir, mode=0755, parents=True, logerr=False, level=logging.DEBUG2) if err is not None: if isclass(err[0]) and issubclass(err[0], SkipTest): status = 'SKIP' status_data = 'Reason = %s' % (err[1]) else: if isclass(err[0]) and issubclass(err[0], TimeOut): status = 'TIMEDOUT' status_data = getattr(test, 'err_in_string', '') else: status_data = '' logfile = os.path.join(datadir, 'logfile_' + status) f = open(logfile, 'w+') f.write(testlogs + '\n') f.write(status_data + '\n') f.write('test duration: %s\n' % str(getattr(test, 'duration', '0'))) if status in ('PASS', 'SKIP'): # Test case passed or skipped, no need to save post analysis data f.close() return if ((self.max_postdata_threshold != 0) and (self.__save_data_count >= self.max_postdata_threshold)): _msg = 'Total number of saved post analysis data for this' _msg += ' testsuite is exceeded max postdata threshold' _msg += ' (%d)' % self.max_postdata_threshold f.write(_msg + '\n') self.logger.error(_msg) f.close() return svr = getattr(_test, 'server', None) if svr is not None: svr_host = svr.hostname else: _msg = 'Could not find Server Object in given test object' _msg += ', skipping saving post analysis data' f.write(_msg + '\n') self.logger.warning(_msg) f.close() return pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported', 'pbs_diag') cur_user = self.du.get_current_user() cmd = [pbs_diag, '-f', '-d', '2'] cmd += ['-u', cur_user] cmd += ['-o', pwd.getpwnam(cur_user).pw_dir] if len(svr.jobs) > 0: cmd += ['-j', ','.join(svr.jobs.keys())] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get diag information for ' _msg += 'on %s:' % svr_host _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*" m = re.search(diag_re, '\n'.join(ret['out'])) if m is not None: diag_out = m.group('path') else: _msg = 'Failed to find generated diag path in below output:' _msg += '\n\n' + '-' * 80 + '\n' _msg += '\n'.join(ret['out']) + '\n' _msg += '-' * 80 + '\n\n' f.write(_msg) self.logger.error(_msg) f.close() return diag_out_dest = os.path.join(datadir, os.path.basename(diag_out)) if not self.du.is_localhost(svr_host): diag_out_r = svr_host + ':' + diag_out else: diag_out_r = diag_out ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to copy generated diag from' _msg += ' %s to %s' % (diag_out_r, diag_out_dest) f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: self.du.rm(svr_host, path=diag_out, sudo=True, force=True, level=logging.DEBUG2) cores = [] dir_list = ['server_priv', 'sched_priv', 'mom_priv'] for d in dir_list: path = os.path.join(svr.pbs_conf['PBS_HOME'], d) files = self.du.listdir(hostname=svr_host, path=path, sudo=True, level=logging.DEBUG2) for _f in files: if os.path.basename(_f).startswith('core'): cores.append(_f) cores = list(set(cores)) if len(cores) > 0: cmd = ['gunzip', diag_out_dest] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed unzip generated diag at %s:' % diag_out_dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return diag_out_dest = diag_out_dest.rstrip('.gz') cmd = ['tar', '-xf', diag_out_dest, '-C', datadir] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed extract generated diag %s' % diag_out_dest _msg += ' to %s:' % datadir _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(hostname=current_host, path=diag_out_dest, force=True, sudo=True, level=logging.DEBUG2) diag_out_dest = diag_out_dest.rstrip('.tar') for c in cores: cmd = [pbs_diag, '-g', c] ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed to get core file information for ' _msg += '%s on %s:' % (c, svr_host) _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) else: of = os.path.join(diag_out_dest, os.path.basename(c) + '.out') _f = open(of, 'w+') _f.write('\n'.join(ret['out']) + '\n') _f.close() self.du.rm(hostname=svr_host, path=c, force=True, sudo=True, level=logging.DEBUG2) cmd = ['tar', '-cf', diag_out_dest + '.tar'] cmd += [os.path.basename(diag_out_dest)] ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed generate tarball of diag directory' _msg += ' %s' % diag_out_dest _msg += ' after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return cmd = ['gzip', diag_out_dest + '.tar'] ret = self.du.run_cmd(current_host, cmd, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed compress tarball of diag %s' % diag_out_dest _msg += '.tar after adding core(s) information in it:' _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, diag_out_dest, sudo=True, recursive=True, force=True, level=logging.DEBUG2) else: diag_out_dest = diag_out_dest.rstrip('.tar.gz') dest = os.path.join(datadir, 'PBS_' + current_host.split('.')[0] + '.tar.gz') ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz', dest, sudo=True, level=logging.DEBUG2) if ret['rc'] != 0: _msg = 'Failed rename tarball of diag from %s' % diag_out_dest _msg += '.tar.gz to %s:' % dest _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return self.du.rm(current_host, path=diag_out_dest + '.tar.gz', force=True, sudo=True, level=logging.DEBUG2) f.close() self.__save_data_count += 1 _msg = 'Successfully saved post analysis data' self.logger.log(logging.DEBUG2, _msg) def addError(self, test, err): self.__save_home(test, 'ERROR', err) def addFailure(self, test, err): self.__save_home(test, 'FAIL', err) def addSuccess(self, test): self.__save_home(test, 'PASS')
class LcovUtils(object): """ Coverage Utils :param cov_bin: Coverage binary :param html_bin: Coverage html binary :param cov_out: Coverage output directory :type cov_out: str or None :param data_dir: Coverage data directory :type data_dir: str or None :param html_nosrc: HTML reports without PBS source :type stml_nosrc: bool :param html_baseurl: HTML base url :type html_baseurl: str or None """ du = DshUtils() logger = logging.getLogger(__name__) def __init__(self, cov_bin=None, html_bin=None, cov_out=None, data_dir=None, html_nosrc=False, html_baseurl=None): self.set_coverage_data_dir(data_dir) self.set_coverage_bin(cov_bin) self.set_genhtml_bin(html_bin) self.set_coverage_out(cov_out) self.set_html_nosource(html_nosrc) self.set_html_baseurl(html_baseurl) self.coverage_traces = [] def set_html_baseurl(self, baseurl): """ Set ``HTML`` base url """ self.logger.info('coverage baseurl set to ' + str(baseurl)) self.html_baseurl = baseurl def set_html_nosource(self, nosource=False): """ Set HTML nosource parameter to False. """ self.logger.info('coverage no-source set to ' + str(nosource)) self.html_nosrc = nosource def set_coverage_bin(self, cov_bin=None): """ Set the coverage binary :param cov_binary: Coverage bin to set """ if cov_bin is None: cov_bin = 'lcov' rv = CliUtils.check_bin(cov_bin) if not rv: self.logger.error('None lcov_bin defined!') sys.exit(1) else: self.logger.info('coverage utility set to ' + cov_bin) self.cov_bin = cov_bin return rv def set_genhtml_bin(self, html_bin=None): """ Set HTML generation utility. :param html_bin: HTML bin to set """ if html_bin is None: html_bin = 'genhtml' rv = CliUtils.check_bin(html_bin) if not rv: self.logger.error('%s tool not found' % (html_bin)) self.html_bin = None else: self.logger.info('HTML generation utility set to ' + html_bin) self.html_bin = html_bin return rv def set_coverage_out(self, cov_out=None): """ Set the coverage output directory :param cov_out: Coverage output directory path. """ if cov_out is None: d = 'pbscov-' + time.strftime('%Y%m%d_%H%M%S', time.localtime()) cov_out = os.path.join(tempfile.gettempdir(), d) if not os.path.isdir(cov_out): os.mkdir(cov_out) self.logger.info('coverage output directory set to ' + cov_out) self.cov_out = cov_out def set_coverage_data_dir(self, data=None): """ Set the coverage data directory :param data: Data directory path :returns: True if file name ends with .gcno else return False """ self.data_dir = data if self.data_dir is not None: walker = os.walk(self.data_dir) for _, _, files in walker: for f in files: if f.endswith('.gcno'): return True return False def add_trace(self, trace): """ Add coverage trace :param trace: Coverage trace """ if trace not in self.coverage_traces: self.logger.info('Adding coverage trace: %s' % (trace)) self.coverage_traces.append(trace) def create_coverage_data_files(self, path): """ Create .gcda counterpart files for every .gcno file and give it read/write permissions """ walker = os.walk(path) for root, _, files in walker: for f in files: if f.endswith('.gcda'): pf = os.path.join(root, f) s = os.stat(pf) if (s.st_mode & S_IWOTH) == 0: self.du.run_cmd(cmd=['chmod', '666', pf], level=logging.DEBUG, sudo=True) elif f.endswith('.gcno'): nf = f.replace('.gcno', '.gcda') pf = os.path.join(root, nf) if not os.path.isfile(pf): self.du.run_cmd(cmd=['touch', pf], level=logging.DEBUG, sudo=True) self.du.run_cmd(cmd=['chmod', '666', pf], level=logging.DEBUG, sudo=True) def initialize_coverage(self, out=None, name=None): """ Initialize coverage :param out: Output path :type out: str or None :param name: name of the command :type name: str or None """ if self.data_dir is not None: if out is None: out = os.path.join(self.cov_out, 'baseline.info') self.logger.info('Initializing coverage data to ' + out) self.create_coverage_data_files(self.data_dir) cmd = [self.cov_bin] if name is not None: cmd += ['-t', name] cmd += ['-i', '-d', self.data_dir, '-c', '-o', out] self.du.run_cmd(cmd=cmd, logerr=False) self.add_trace(out) def capture_coverage(self, out=None, name=None): """ Capture the coverage parameters """ if self.data_dir is not None: if out is None: out = os.path.join(self.cov_out, 'tests.info') self.logger.info('Capturing coverage data to ' + out) cmd = [self.cov_bin] if name is not None: cmd += ['-t', name] cmd += ['-c', '-d', self.data_dir, '-o', out] self.du.run_cmd(cmd=cmd, logerr=False) self.add_trace(out) def zero_coverage(self): """ Zero the data counters. Note that a process would need to be restarted in order to collect data again, running ``--initialize`` will not get populate the data counters """ if self.data_dir is not None: self.logger.info('Resetting coverage data') cmd = [self.cov_bin, '-z', '-d', self.data_dir] self.du.run_cmd(cmd=cmd, logerr=False) def merge_coverage_traces(self, out=None, name=None, exclude=None): """ Merge the coverage traces """ if not self.coverage_traces: return if out is None: out = os.path.join(self.cov_out, 'total.info') self.logger.info('Merging coverage traces to ' + out) if exclude is not None: tmpout = out + '.tmp' else: tmpout = out cmd = [self.cov_bin] if name is not None: cmd += ['-t', name] for t in self.coverage_traces: cmd += ['-a', t] cmd += ['-o', tmpout] self.du.run_cmd(cmd=cmd, logerr=False) if exclude is not None: cmd = [self.cov_bin] if name is not None: cmd += ['-t', name] cmd += ['-r', tmpout] + exclude + ['-o', out] self.du.run_cmd(cmd=cmd, logerr=False) self.du.rm(path=tmpout, logerr=False) def generate_html(self, out=None, html_out=None, html_nosrc=False): """ Generate the ``HTML`` report """ if self.html_bin is None: self.logger.warn('No genhtml bin is defined') return if out is None: out = os.path.join(self.cov_out, 'total.info') if not os.path.isfile(out): return if html_out is None: html_out = os.path.join(self.cov_out, 'html') if (self.html_nosrc or html_nosrc): self.logger.info('Generating HTML reports (without PBS source)' ' from coverage data') cmd = [self.html_bin, '--no-source', out] cmd += ['-o', html_out] self.du.run_cmd(cmd=cmd, logerr=False) else: self.logger.info('Generating HTML reports (with PBS Source) from' ' coverage data') cmd = [self.html_bin, out, '-o', html_out] self.du.run_cmd(cmd=cmd, logerr=False) def change_baseurl(self, html_out=None, html_baseurl=None): """ Change the ``HTML`` base url """ if html_baseurl is None: html_baseurl = self.html_baseurl if html_baseurl is None: return if html_out is None: html_out = os.path.join(self.cov_out, 'html') if not os.path.isdir(html_out): return html_out_bu = os.path.join(os.path.dirname(html_out), os.path.basename(html_out) + '_baseurl') if html_baseurl[-1] != '/': html_baseurl += '/' self.logger.info('Changing baseurl to %s' % (html_baseurl)) self.du.run_copy(src=html_out, dest=html_out_bu, recursive=True) for root, _, files in os.walk(html_out_bu): newroot = root.split(html_out_bu)[1] if ((len(newroot) > 0) and (newroot[0] == '/')): newroot = newroot[1:] newroot = urljoin(html_baseurl, newroot) if newroot[-1] != '/': newroot += '/' print root, newroot for f in files: if not f.endswith('.html'): continue f = os.path.join(root, f) fd = open(f, 'r') line = ''.join(fd.readlines()) fd.close() tree = BeautifulSoup(line) for a in tree.findAll('a'): href = a['href'] if href.startswith('http://'): continue a['href'] = urljoin(newroot, href) for img in tree.findAll('img'): img['src'] = urljoin(newroot, img['src']) for css in tree.findAll('link', rel='stylesheet'): css['href'] = urljoin(newroot, css['href']) fd = open(f, 'w+') fd.write(str(tree)) fd.close() def summarize_coverage(self, out=None): """ Summarize the coverage output """ if out is None: out = os.path.join(self.cov_out, 'total.info') if not os.path.isfile(out): return '' self.logger.info('Summarizing coverage data from ' + out) cmd = [self.cov_bin, '--summary', out] return self.du.run_cmd(cmd=cmd, logerr=False)['err']
class ProcUtils(object): """ Utilities to query process information """ logger = logging.getLogger(__name__) du = DshUtils() platform = sys.platform def __init__(self): self.processes = {} self.__h2ps = {} def get_ps_cmd(self, hostname=None): """ Get the ps command :param hostname: hostname of the machine :type hostname: str or None """ if hostname is None: hostname = socket.gethostname() if hostname in self.__h2ps: return self.__h2ps[hostname] if not self.du.is_localhost(hostname): platform = self.du.get_platform(hostname) else: platform = self.platform # set some platform-specific arguments to ps ps_arg = '-C' if platform.startswith('aix'): ps_cmd = ['ps', '-o', 'pid,rssize,vsz,pcpu,comm'] ps_arg = '-n' elif platform.startswith('hp-ux'): ps_cmd = ['ps', '-o', 'pid,sz,vsz,pcpu,comm'] elif platform.startswith('sun'): ps_cmd = ['ps', '-o', 'pid,rss,vsz,pcpu,comm'] ps_arg = '-n' else: ps_cmd = ['ps', '-o', 'pid,rss,vsz,pcpu,command'] self.__h2ps[hostname] = (ps_cmd, ps_arg) return (ps_cmd, ps_arg) def _init_processes(self): self.processes = {} def _get_proc_info_unix(self, hostname=None, name=None, pid=None, regexp=False): """ Helper function to ``get_proc_info`` for Unix only system """ (ps_cmd, ps_arg) = self.get_ps_cmd(hostname) if name is not None: if not regexp: cr = self.du.run_cmd(hostname, (ps_cmd + [ps_arg, name]), level=logging.DEBUG2) else: cr = self.du.run_cmd(hostname, ps_cmd + ['-e'], level=logging.DEBUG2) elif pid is not None: cr = self.du.run_cmd(hostname, ps_cmd + ['-p', pid], level=logging.DEBUG2) else: return if cr['rc'] == 0 and cr['out']: for proc in cr['out']: _pi = None try: _s = proc.split() p = _s[0] rss = _s[1] vsz = _s[2] pcpu = _s[3] command = " ".join(_s[4:]) except: continue if ((pid is not None and p == str(pid)) or (name is not None and ( (regexp and re.search(name, command) is not None) or (not regexp and name in command)))): _pi = ProcInfo(name=command) _pi.pid = p _pi.rss = rss _pi.vsz = vsz _pi.pcpu = pcpu _pi.command = command if _pi is not None: if command in self.processes: self.processes[command].append(_pi) else: self.processes[command] = [_pi] return self.processes def get_proc_info(self, hostname=None, name=None, pid=None, regexp=False): """ Return process information from a process name, or pid, on a given host :param hostname: The hostname on which to query the process info. On Windows,only localhost is queried. :type hostname: str or none :param name: The name of the process to query. :type name: str or None :param pid: The pid of the process to query :type pid: int or None :param regexp: Match processes by regular expression. Defaults to True. Does not apply to matching by PID. :type regexp: bool :returns: A list of ProcInfo objects, one for each matching process. .. note:: If both, name and pid, are specified, name is used. """ self._init_processes() return self._get_proc_info_unix(hostname, name, pid, regexp) def get_proc_state(self, hostname=None, pid=None): """ :returns: PID's process state on host hostname On error the empty string is returned. """ if not self.du.is_localhost(hostname): platform = self.du.get_platform(hostname) else: platform = sys.platform try: if platform.startswith('linux'): cmd = ['ps', '-o', 'stat', '-p', str(pid), '--no-heading'] rv = self.du.run_cmd(hostname, cmd, level=logging.DEBUG2) return rv['out'][0][0] elif platform.startswith('sunos'): cmd = ['ps', '-o' 's', '-p', str(pid)] rv = self.du.run_cmd(hostname, cmd, level=logging.DEBUG2) return rv['out'][1][0] elif platform.startswith('aix'): cmd = ['ps', '-o' 'state', '-p', str(pid)] rv = self.du.run_cmd(hostname, cmd, level=logging.DEBUG2) return rv['out'][1][0] except: self.logger.error('Error getting process state for pid ' + pid) return '' def get_proc_children(self, hostname=None, ppid=None): """ :returns: A list of children PIDs associated to ``PPID`` on host hostname. On error, an empty list is returned. """ try: if not isinstance(ppid, str): ppid = str(ppid) if int(ppid) <= 0: raise if not self.du.is_localhost(hostname): platform = self.du.get_platform(hostname) else: platform = sys.platform childlist = [] if platform.startswith('linux'): cmd = ['ps', '-o', 'pid', '--ppid:%s' % ppid, '--no-heading'] rv = self.du.run_cmd(hostname, cmd) children = rv['out'][:-1] elif platform.startswith('sunos') or platform.startswith('aix'): children = [] cmd = ['ps', 'o', 'pid,ppid'] ret = self.du.run_cmd(hostname, cmd) for o in ret['out']: (pid, ppid) = o.split() if ppid.strip() == str(ppid): children.append(pid.strip()) else: children = [] for child in children: child = child.strip() if child != '': childlist.append(child) childlist.extend(self.get_proc_children(hostname, child)) return childlist except: self.logger.error('Error getting children processes of parent ' + ppid) return []
def check_hardware_status_and_core_files(self, test): """ function checks hardware status and core files every 5 minutes """ du = DshUtils() systems = list(self.param_dict['servers']) systems.extend(self.param_dict['moms']) systems.extend(self.param_dict['comms']) systems = list(set(systems)) if hasattr(test, 'test'): _test = test.test elif hasattr(test, 'context'): _test = test.context else: return None for name in ['servers', 'moms', 'comms', 'clients']: mlist = None if (hasattr(_test, name) and (getattr(_test, name, None) is not None)): mlist = getattr(_test, name).values() if mlist: for mc in mlist: platform = mc.platform if ((platform not in ['linux', 'shasta', 'cray']) and (mc.hostname in systems)): systems.remove(mc.hostname) self.hardware_report_timer = Timer( 300, self.check_hardware_status_and_core_files, args=(test, )) self.hardware_report_timer.start() for hostname in systems: hr = SystemInfo() hr.get_system_info(hostname) # monitors disk used_disk_percent = getattr(hr, 'system_disk_used_percent', None) if used_disk_percent is None: _msg = hostname _msg += ": unable to get disk info" self.hardware_report_timer.cancel() raise SkipTest(_msg) elif 70 <= used_disk_percent < 95: _msg = hostname + ": disk usage is at " _msg += str(used_disk_percent) + "%" _msg += ", disk cleanup is recommended." self.logger.warning(_msg) elif used_disk_percent >= 95: _msg = hostname + ":disk usage > 95%, skipping the test(s)" self.hardware_report_timer.cancel() raise SkipTest(_msg) # checks for core files pbs_conf = du.parse_pbs_config(hostname) mom_priv_path = os.path.join(pbs_conf["PBS_HOME"], "mom_priv") if du.isdir(hostname=hostname, path=mom_priv_path): mom_priv_files = du.listdir(hostname=hostname, path=mom_priv_path, sudo=True, fullpath=False) if fnmatch.filter(mom_priv_files, "core*"): _msg = hostname + ": core files found in " _msg += mom_priv_path self.logger.warning(_msg) server_priv_path = os.path.join(pbs_conf["PBS_HOME"], "server_priv") if du.isdir(hostname=hostname, path=server_priv_path): server_priv_files = du.listdir(hostname=hostname, path=server_priv_path, sudo=True, fullpath=False) if fnmatch.filter(server_priv_files, "core*"): _msg = hostname + ": core files found in " _msg += server_priv_path self.logger.warning(_msg) sched_priv_path = os.path.join(pbs_conf["PBS_HOME"], "sched_priv") if du.isdir(hostname=hostname, path=sched_priv_path): sched_priv_files = du.listdir(hostname=hostname, path=sched_priv_path, sudo=True, fullpath=False) if fnmatch.filter(sched_priv_files, "core*"): _msg = hostname + ": core files found in " _msg += sched_priv_path self.logger.warning(_msg) for u in PBS_ALL_USERS: user_home_files = du.listdir(hostname=hostname, path=u.home, sudo=True, fullpath=False, runas=u.name) if user_home_files and fnmatch.filter(user_home_files, "core*"): _msg = hostname + ": user-" + str(u) _msg += ": core files found in " self.logger.warning(_msg + u.home)
def _cleanup(self): self.logger.info('Cleaning up temporary files') du = DshUtils() hosts = self.param_dict['moms'] for server in self.param_dict['servers']: if server not in self.param_dict['moms']: hosts.update(self.param_dict['servers']) for user in PBS_USERS: self.logger.info('Cleaning %s\'s home directory' % (str(user))) runas = PbsUser.get_user(user) for host in hosts: ret = du.run_cmd(host, cmd=['echo', '$HOME'], sudo=True, runas=runas, logerr=False, as_script=True) if ret['rc'] == 0: path = ret['out'][0].strip() else: return None ftd = [] files = du.listdir(host, path=path, runas=user) bn = os.path.basename ftd.extend([f for f in files if bn(f).startswith('PtlPbs')]) ftd.extend([f for f in files if bn(f).startswith('STDIN')]) if len(ftd) > 1000: for i in range(0, len(ftd), 1000): j = i + 1000 du.rm(host, path=ftd[i:j], runas=user, force=True, level=logging.DEBUG) root_dir = os.sep dirlist = set([ os.path.join(root_dir, 'tmp'), os.path.join(root_dir, 'var', 'tmp') ]) # get tmp dir from the environment for envname in 'TMPDIR', 'TEMP', 'TMP': dirname = os.getenv(envname) if dirname: dirlist.add(dirname) p = re.compile(r'^pbs\.\d+') for tmpdir in dirlist: # list the contents of each tmp dir and # get the file list to be deleted self.logger.info('Cleaning up ' + tmpdir + ' dir') ftd = [] files = du.listdir(path=tmpdir) bn = os.path.basename ftd.extend([f for f in files if bn(f).startswith('PtlPbs')]) ftd.extend([f for f in files if bn(f).startswith('STDIN')]) ftd.extend([f for f in files if bn(f).startswith('pbsscrpt')]) ftd.extend([f for f in files if bn(f).startswith('pbs.conf.')]) ftd.extend([f for f in files if p.match(bn(f))]) for f in ftd: du.rm(path=f, sudo=True, recursive=True, force=True, level=logging.DEBUG) for f in du.tmpfilelist: du.rm(path=f, sudo=True, force=True, level=logging.DEBUG) del du.tmpfilelist[:] tmpdir = tempfile.gettempdir() os.chdir(tmpdir) tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid()) if du.isdir(path=tmppath): du.rm(path=tmppath, recursive=True, sudo=True, force=True, level=logging.DEBUG)
class PTLJsonData(object): """ The intent of the class is to generate json format of PTL test data """ def __init__(self, command): self.__du = DshUtils() self.__cmd = command def get_json(self, data, prev_data=None): """ Method to generate test data in accordance to json schema :param data: dictionary of a test case details :type data: dict :param prev_data: dictionary of test run details that ran before the current test :type prev_data: dict :returns a formatted dictionary of the data """ data_json = None if not prev_data: data_json = { 'command': self.__cmd, 'user': self.__du.get_current_user(), 'product_version': data['pbs_version'], 'run_id': data['start_time'].strftime('%s'), 'test_conf': {}, 'machine_info': data['machinfo'], 'testsuites': {}, 'additional_data': {}, 'test_summary': { 'result_summary': { 'run': 0, 'succeeded': 0, 'failed': 0, 'errors': 0, 'skipped': 0, 'timedout': 0 }, 'test_start_time': str(data['start_time']), 'tests_with_failures': [], 'test_suites_with_failures': [] } } if data['testparam']: for param in data['testparam'].split(','): if '=' in param: par = param.split('=', 1) data_json['test_conf'][par[0]] = par[1] else: data_json['test_conf'][param] = True else: data_json = prev_data tsname = data['suite'] tcname = data['testcase'] if tsname not in data_json['testsuites']: data_json['testsuites'][tsname] = { 'module': data['module'], 'file': data['file'], 'testcases': {} } tsdoc = [] if data['suitedoc']: tsdoc = (re.sub(r"[\t\n ]+", " ", data['suitedoc'])).strip() data_json['testsuites'][tsname]['docstring'] = tsdoc tcshort = {} tcdoc = [] if data['testdoc']: tcdoc = (re.sub(r"[\t\n ]+", " ", data['testdoc'])).strip() tcshort['docstring'] = tcdoc if data['tags']: tcshort['tags'] = data['tags'] tcshort['results'] = { 'status': data['status'], 'status_data': str(data['status_data']), 'duration': str(data['duration']), 'start_time': str(data['start_time']), 'end_time': str(data['end_time']), 'measurements': [] } tcshort['requirements'] = data['requirements'] if 'measurements' in data: tcshort['results']['measurements'] = data['measurements'] data_json['testsuites'][tsname]['testcases'][tcname] = tcshort if 'additional_data' in data: data_json['additional_data'] = data['additional_data'] data_json['test_summary']['test_end_time'] = str(data['end_time']) data_json['test_summary']['result_summary']['run'] += 1 d_ts = data_json['test_summary'] if data['status'] == 'PASS': d_ts['result_summary']['succeeded'] += 1 elif data['status'] == 'SKIP': d_ts['result_summary']['skipped'] += 1 elif data['status'] == 'TIMEDOUT': d_ts['result_summary']['timedout'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'ERROR': d_ts['result_summary']['errors'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'FAIL': d_ts['result_summary']['failed'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) return data_json
class PTLJsonData(object): """ The intent of the class is to generate json format of PTL test data """ def __init__(self, command): self.__du = DshUtils() self.__cmd = command def get_json(self, data, prev_data=None): """ Method to generate test data in accordance to json schema :param data: dictionary of a test case details :type data: dict :param prev_data: dictionary of test run details that ran before the current test :type prev_data: dict :returns a formatted dictionary of the data """ data_json = None if not prev_data: data_json = { 'command': self.__cmd, 'user': self.__du.get_current_user(), 'product_version': data['pbs_version'], 'run_id': data['start_time'].strftime('%s'), 'test_conf': {}, 'machine_info': data['machinfo'], 'testsuites': {}, 'additional_data': {}, 'test_summary': { 'result_summary': { 'run': 0, 'succeeded': 0, 'failed': 0, 'errors': 0, 'skipped': 0, 'timedout': 0 }, 'test_start_time': str(data['start_time']), 'tests_with_failures': [], 'test_suites_with_failures': [] } } if data['testparam']: for param in data['testparam'].split(','): par = param.split('=', 1) data_json['test_conf'][par[0]] = par[1] else: data_json = prev_data tsname = data['suite'] tcname = data['testcase'] if tsname not in data_json['testsuites']: data_json['testsuites'][tsname] = { 'module': data['module'], 'file': data['file'], 'testcases': {} } tsdoc = [] if data['suitedoc']: tsdoc = (re.sub(r"[\t\n ]+", " ", data['suitedoc'])).strip() data_json['testsuites'][tsname]['docstring'] = tsdoc tcshort = {} tcdoc = [] if data['testdoc']: tcdoc = (re.sub(r"[\t\n ]+", " ", data['testdoc'])).strip() tcshort['docstring'] = tcdoc if data['tags']: tcshort['tags'] = data['tags'] tcshort['results'] = { 'status': data['status'], 'status_data': str(data['status_data']), 'duration': str(data['duration']), 'start_time': str(data['start_time']), 'end_time': str(data['end_time']), 'measurements': [] } tcshort['requirements'] = {} if 'measurements' in data: tcshort['results']['measurements'] = data['measurements'] data_json['testsuites'][tsname]['testcases'][tcname] = tcshort if 'additional_data' in data: data_json['additional_data'] = data['additional_data'] data_json['test_summary']['test_end_time'] = str(data['end_time']) data_json['test_summary']['result_summary']['run'] += 1 d_ts = data_json['test_summary'] if data['status'] == 'PASS': d_ts['result_summary']['succeeded'] += 1 elif data['status'] == 'SKIP': d_ts['result_summary']['skipped'] += 1 elif data['status'] == 'TIMEDOUT': d_ts['result_summary']['timedout'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'ERROR': d_ts['result_summary']['errors'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'FAIL': d_ts['result_summary']['failed'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) return data_json
class CrayUtils(object): """ Cray specific utility class """ node_status = [] node_summary = {} du = None def __init__(self): self.du = DshUtils() (self.node_status, self.node_summary) = self.parse_apstat_rn() def parse_apstat_rn(self): """ Run apstat command on cray/craysim and parse its output :param options: options to pass to apstat command :type options: str :returns: tuple of (node status, node summary) """ status = [] summary = {} count = 0 options = '-rn' hostname = socket.gethostname() platform = self.du.get_platform(hostname) apstat_env = os.environ apstat_cmd = "apstat" if 'cray' not in platform: return (status, summary) if 'craysim' in platform: lib_path = '$LD_LIBRARY_PATH:/opt/alps/tester/usr/lib/' apstat_env['LD_LIBRARY_PATH'] = lib_path apstat_env['ALPS_CONFIG_FILE'] = '/opt/alps/tester/alps.conf' apstat_env['apsched_sharedDir'] = '/opt/alps/tester/' apstat_cmd = "/opt/alps/tester/usr/bin/apstat -d ." cmd_run = self.du.run_cmd(hostname, [apstat_cmd, options], as_script=True, wait_on_script=True, env=apstat_env) cmd_result = cmd_run['out'] keys = cmd_result[0].split() # Add a key 'Mode' because 'State' is composed of two list items, e.g: # State = 'UP B', where Mode = 'B' k2 = ['Mode'] keys = keys[0:3] + k2 + keys[3:] cmd_iter = iter(cmd_result) for line in cmd_iter: if count == 0: count = 1 continue if "Compute node summary" in line: summary_line = next(cmd_iter) summary_keys = summary_line.split() summary_data = next(cmd_iter).split() sum_index = 0 for a in summary_keys: summary[a] = summary_data[sum_index] sum_index += 1 break obj = {} line = line.split() for i, value in enumerate(line): obj[keys[i]] = value if keys[i] == 'State': obj[keys[i]] = value + " " + line[i + 1] # If there is no Apids in the apstat then use 'None' as the value if "Apids" in obj: pass else: obj["Apids"] = None status.append(obj) return (status, summary) def count_node_summ(self, cnsumm='up'): """ Return the value of any one of the following parameters as shown in the 'Compute Node Summary' section of 'apstat -rn' output: arch, config, up, resv, use, avail, down :param cnsumm: parameter which is being queried, defaults to 'up' :type cnsumm: str :returns: value of parameter being queried """ return int(self.node_summary[cnsumm]) def count_node_state(self, state='UP B'): """ Return how many nodes have a certain 'State' value. :param state: parameter which is being queried, defaults to 'UP B' :type state: str :returns: count of how many nodes have the state """ count = 0 status = self.node_status for stat in status: if stat['State'] == state: count += 1 return count
class PTLTestData(Plugin): """ Save post analysis data on test cases failure or error """ name = 'PTLTestData' score = sys.maxsize - 6 logger = logging.getLogger(__name__) def __init__(self): Plugin.__init__(self) self.post_data_dir = None self.max_postdata_threshold = None self.__save_data_count = 0 self.__priv_sn = '' self.du = DshUtils() def options(self, parser, env): """ Register command line options """ pass def set_data(self, post_data_dir, max_postdata_threshold): self.post_data_dir = post_data_dir self.max_postdata_threshold = max_postdata_threshold def configure(self, options, config): """ Configure the plugin and system, based on selected options """ self.config = config if self.post_data_dir is not None: self.enabled = True else: self.enabled = False def __save_home(self, test, status, err=None): if hasattr(test, 'test'): _test = test.test sn = _test.__class__.__name__ elif hasattr(test, 'context'): _test = test.context sn = _test.__name__ else: # test does not have any PBS Objects, so just return return if self.__priv_sn != sn: self.__save_data_count = 0 self.__priv_sn = sn # Saving home might take time so disable timeout # handler set by runner tn = getattr(_test, '_testMethodName', 'unknown') testlogs = getattr(test, 'captured_logs', '') datadir = os.path.join(self.post_data_dir, sn, tn) if os.path.exists(datadir): _msg = 'Old post analysis data exists at %s' % datadir _msg += ', skipping saving data for this test case' self.logger.warn(_msg) _msg = 'Please remove old directory or' _msg += ' provide different directory' self.logger.warn(_msg) return if getattr(test, 'old_sigalrm_handler', None) is not None: _h = getattr(test, 'old_sigalrm_handler') signal.signal(signal.SIGALRM, _h) signal.alarm(0) self.logger.log(logging.DEBUG2, 'Saving post analysis data...') current_host = socket.gethostname().split('.')[0] self.du.mkdir(current_host, path=datadir, mode=0o755, parents=True, logerr=False, level=logging.DEBUG2) if err is not None: if isclass(err[0]) and issubclass(err[0], SkipTest): status = 'SKIP' status_data = 'Reason = %s' % (err[1]) else: if isclass(err[0]) and issubclass(err[0], TimeOut): status = 'TIMEDOUT' status_data = getattr(test, 'err_in_string', '') else: status_data = '' logfile = os.path.join(datadir, 'logfile_' + status) f = open(logfile, 'w+') f.write(testlogs + '\n') f.write(status_data + '\n') f.write('test duration: %s\n' % str(getattr(test, 'duration', '0'))) if status in ('PASS', 'SKIP'): # Test case passed or skipped, no need to save post analysis data f.close() return if ((self.max_postdata_threshold != 0) and (self.__save_data_count >= self.max_postdata_threshold)): _msg = 'Total number of saved post analysis data for this' _msg += ' testsuite is exceeded max postdata threshold' _msg += ' (%d)' % self.max_postdata_threshold f.write(_msg + '\n') self.logger.error(_msg) f.close() return servers = getattr(_test, 'servers', None) if servers is not None: server_host = servers.values()[0].shortname else: _msg = 'Could not find Server Object in given test object' _msg += ', skipping saving post analysis data' f.write(_msg + '\n') self.logger.warning(_msg) f.close() return moms = getattr(_test, 'moms', None) comms = getattr(_test, 'comms', None) client = getattr(_test.servers.values()[0], 'client', None) server = servers.values()[0] add_hosts = [] if len(servers) > 1: for param in servers.values()[1:]: add_hosts.append(param.shortname) if moms is not None: for param in moms.values(): add_hosts.append(param.shortname) if comms is not None: for param in comms.values(): add_hosts.append(param.shortname) if client is not None: add_hosts.append(client.split('.')[0]) add_hosts = list(set(add_hosts) - set([server_host])) pbs_snapshot_path = os.path.join( server.pbs_conf["PBS_EXEC"], "sbin", "pbs_snapshot") cur_user = self.du.get_current_user() cur_user_dir = pwd.getpwnam(cur_user).pw_dir cmd = [ pbs_snapshot_path, '-H', server_host, '--daemon-logs', '2', '--accounting-logs', '2', '--with-sudo' ] if len(add_hosts) > 0: cmd += ['--additional-hosts=' + ','.join(add_hosts)] cmd += ['-o', cur_user_dir] ret = self.du.run_cmd(current_host, cmd, level=logging.DEBUG2, logerr=False) if ret['rc'] != 0: _msg = 'Failed to get analysis information ' _msg += 'on %s:' % server_host _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n' f.write(_msg + '\n') self.logger.error(_msg) f.close() return else: if len(ret['out']) == 0: self.logger.error('Snapshot command failed') f.close() return snap_out = ret['out'][0] snap_out_dest = (snap_out.split(":")[1]).strip() dest = os.path.join(datadir, 'PBS_' + server_host + '.tar.gz') ret = self.du.run_copy(current_host, src=snap_out_dest, dest=dest, sudo=True, level=logging.DEBUG2) self.du.rm(current_host, path=snap_out_dest, recursive=True, force=True, level=logging.DEBUG2) f.close() self.__save_data_count += 1 _msg = 'Saved post analysis data' self.logger.info(_msg) def addError(self, test, err): self.__save_home(test, 'ERROR', err) def addFailure(self, test, err): self.__save_home(test, 'FAIL', err) def addSuccess(self, test): self.__save_home(test, 'PASS')
class CrayUtils(object): """ Cray specific utility class """ node_status = [] node_summary = {} cmd_output = [] du = None def __init__(self): self.du = DshUtils() (self.node_status, self.node_summary) = self.parse_apstat_rn() def call_apstat(self, options): """ Build the apstat command and run it. Return the output of the command. :param options: options to pass to apstat command :type options: str :returns: the command output """ hostname = socket.gethostname() platform = self.du.get_platform(hostname) apstat_env = os.environ apstat_cmd = "apstat" if 'cray' not in platform: return None if 'craysim' in platform: lib_path = '$LD_LIBRARY_PATH:/opt/alps/tester/usr/lib/' apstat_env['LD_LIBRARY_PATH'] = lib_path apstat_env['ALPS_CONFIG_FILE'] = '/opt/alps/tester/alps.conf' apstat_env['apsched_sharedDir'] = '/opt/alps/tester/' apstat_cmd = "/opt/alps/tester/usr/bin/apstat -d ." cmd_run = self.du.run_cmd(hostname, [apstat_cmd, options], as_script=True, wait_on_script=True, env=apstat_env) return cmd_run def parse_apstat_rn(self): """ Parse the apstat command output for node status and summary :type options: str :returns: tuple of (node status, node summary) """ status = [] summary = {} count = 0 options = '-rn' cmd_run = self.call_apstat(options) if cmd_run is None: return (status, summary) cmd_result = cmd_run['out'] keys = cmd_result[0].split() # Add a key 'Mode' because 'State' is composed of two list items, e.g: # State = 'UP B', where Mode = 'B' k2 = ['Mode'] keys = keys[0:3] + k2 + keys[3:] cmd_iter = iter(cmd_result) for line in cmd_iter: if count == 0: count = 1 continue if "Compute node summary" in line: summary_line = next(cmd_iter) summary_keys = summary_line.split() summary_data = next(cmd_iter).split() sum_index = 0 for a in summary_keys: summary[a] = summary_data[sum_index] sum_index += 1 break obj = {} line = line.split() for i, value in enumerate(line): obj[keys[i]] = value if keys[i] == 'State': obj[keys[i]] = value + " " + line[i + 1] # If there is no Apids in the apstat then use 'None' as the value if "Apids" in obj: pass else: obj["Apids"] = None status.append(obj) return (status, summary) def count_node_summ(self, cnsumm='up'): """ Return the value of any one of the following parameters as shown in the 'Compute Node Summary' section of 'apstat -rn' output: arch, config, up, resv, use, avail, down :param cnsumm: parameter which is being queried, defaults to 'up' :type cnsumm: str :returns: value of parameter being queried """ return int(self.node_summary[cnsumm]) def count_node_state(self, state='UP B'): """ Return how many nodes have a certain 'State' value. :param state: parameter which is being queried, defaults to 'UP B' :type state: str :returns: count of how many nodes have the state """ count = 0 status = self.node_status for stat in status: if stat['State'] == state: count += 1 return count def get_numthreads(self, nid): """ Returns the number of hyperthread for the given node """ options = '-N %d -n -f "nid,c/cu"' % int(nid) cmd_run = self.call_apstat(options) if cmd_run is None: return None cmd_result = cmd_run['out'] cmd_iter = iter(cmd_result) numthreads = 0 for line in cmd_iter: if "Compute node summary" in line: break elif "NID" in line: continue else: key = line.split() numthreads = int(key[1]) return numthreads def num_compute_vnodes(self, server): """ Count the Cray compute nodes and return the value. """ vnl = server.filter(MGR_OBJ_NODE, {'resources_available.vntype': 'cray_compute'}) return len(vnl["resources_available.vntype=cray_compute"])
class PTLJsonData(object): """ The intent of the class is to generate json format of PTL test data """ cur_repeat_count = 1 def __init__(self, command): self.__du = DshUtils() self.__cmd = command def get_json(self, data, prev_data=None): """ Method to generate test data in accordance to json schema :param data: dictionary of a test case details :type data: dict :param prev_data: dictionary of test run details that ran before the current test :type prev_data: dict :returns a formatted dictionary of the data """ FMT = '%H:%M:%S.%f' run_count = str(PtlTextTestRunner.cur_repeat_count) data_json = None if not prev_data: PTLJsonData.cur_repeat_count = 1 tests_start = str(data['start_time']).split()[1] data_json = { 'command': self.__cmd, 'user': self.__du.get_current_user(), 'product_version': data['pbs_version'], 'run_id': data['start_time'].strftime('%s'), 'test_conf': {}, 'machine_info': data['machinfo'], 'testsuites': {}, 'additional_data': {}, 'test_summary': {}, 'avg_measurements': {}, 'result': { 'tests_with_failures': [], 'test_suites_with_failures': [], 'start': str(data['start_time']) } } test_summary = { 'result_summary': { 'run': 0, 'succeeded': 0, 'failed': 0, 'errors': 0, 'skipped': 0, 'timedout': 0 }, 'test_start_time': str(data['start_time']), 'tests_with_failures': [], 'test_suites_with_failures': [] } data_json['test_summary'][run_count] = test_summary if data['testparam']: for param in data['testparam'].split(','): if '=' in param: par = param.split('=', 1) data_json['test_conf'][par[0]] = par[1] else: data_json['test_conf'][param] = True else: data_json = prev_data if PTLJsonData.cur_repeat_count != PtlTextTestRunner.cur_repeat_count: test_summary = { 'result_summary': { 'run': 0, 'succeeded': 0, 'failed': 0, 'errors': 0, 'skipped': 0, 'timedout': 0 }, 'test_start_time': str(data['start_time']), 'tests_with_failures': [], 'test_suites_with_failures': [] } data_json['test_summary'][run_count] = test_summary PTLJsonData.cur_repeat_count = PtlTextTestRunner.cur_repeat_count tsname = data['suite'] tcname = data['testcase'] jdata = { 'status': data['status'], 'status_data': str(data['status_data']), 'duration': str(data['duration']), 'start_time': str(data['start_time']), 'end_time': str(data['end_time']), 'measurements': [] } if 'measurements' in data: jdata['measurements'] = data['measurements'] if PtlTextTestRunner.cur_repeat_count == 1: if tsname not in data_json['testsuites']: data_json['testsuites'][tsname] = { 'module': data['module'], 'file': data['file'], 'testcases': {} } tsdoc = [] if data['suitedoc']: tsdoc = (re.sub(r"[\t\n ]+", " ", data['suitedoc'])).strip() data_json['testsuites'][tsname]['docstring'] = tsdoc tcdoc = [] if data['testdoc']: tcdoc = (re.sub(r"[\t\n ]+", " ", data['testdoc'])).strip() data_json['testsuites'][tsname]['testcases'][tcname] = { 'docstring': tcdoc, 'requirements': data['requirements'], 'results': {} } if data['testdoc']: jdata_tests = data_json['testsuites'][tsname]['testcases'] jdata_tests[tcname]['tags'] = data['tags'] jdata_tests = data_json['testsuites'][tsname]['testcases'] jdata_tests[tcname]['results'][run_count] = jdata if 'additional_data' in data: data_json['additional_data'] = data['additional_data'] data_json['test_summary'][run_count]['test_end_time'] = str( data['end_time']) run_summary = data_json['test_summary'][run_count] start = run_summary['test_start_time'].split()[1] end = str(data['end_time']).split()[1] dur = str( datetime.datetime.strptime(end, FMT) - datetime.datetime.strptime(start, FMT)) data_json['test_summary'][run_count]['tests_duration'] = dur data_json['test_summary'][run_count]['result_summary']['run'] += 1 d_ts = data_json['test_summary'][run_count] if data['status'] == 'PASS': d_ts['result_summary']['succeeded'] += 1 elif data['status'] == 'SKIP': d_ts['result_summary']['skipped'] += 1 elif data['status'] == 'TIMEDOUT': d_ts['result_summary']['timedout'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'ERROR': d_ts['result_summary']['errors'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) elif data['status'] == 'FAIL': d_ts['result_summary']['failed'] += 1 d_ts['tests_with_failures'].append(data['testcase']) if data['suite'] not in d_ts['test_suites_with_failures']: d_ts['test_suites_with_failures'].append(data['suite']) m_avg = {'testsuites': {}} for tsname in data_json['testsuites']: m_avg['testsuites'][tsname] = {'testcases': {}} for tcname in data_json['testsuites'][tsname]['testcases']: test_status = "PASS" m_avg['testsuites'][tsname]['testcases'][tcname] = [] t_sum = [] count = 0 j_data = data_json['testsuites'][tsname]['testcases'][tcname] measurements_data = [] for key in j_data['results'].keys(): count += 1 r_count = str(count) m_case = data_json['testsuites'][tsname]['testcases'] m = m_case[tcname]['results'][r_count]['measurements'] if j_data['results'][r_count]['status'] is not "PASS": test_status = "FAIL" m_sum = [] for i in range(len(m)): sum_mean = 0 sum_std = [] sum_min = [] sum_max = [] record = [] if "test_measure" in m[i].keys(): if len(t_sum) > i: sum_mean = m[i]["test_data"]['mean'] + \ t_sum[i][0] sum_std.extend(t_sum[i][1]) sum_min.extend(t_sum[i][2]) sum_max.extend(t_sum[i][3]) else: measurements_data.append(m[i]) sum_mean = m[i]["test_data"]['mean'] sum_std.append(m[i]["test_data"]['mean']) sum_min.append(m[i]["test_data"]['minimum']) sum_max.append(m[i]["test_data"]['maximum']) record = [sum_mean, sum_std, sum_min, sum_max] else: if len(measurements_data) <= i: measurements_data.append(m[i]) record = [sum_mean, sum_std, sum_min, sum_max] m_sum.append(record) if len(t_sum) > len(m_sum): for v in range(len(m_sum)): t_sum[v] = m_sum[v] else: t_sum = m_sum m_list = [] if test_status == "PASS": for i in range(len(measurements_data)): m_data = {} if "test_measure" in measurements_data[i].keys(): measure = measurements_data[i]['test_measure'] m_data['test_measure'] = measure m_data['unit'] = measurements_data[i]['unit'] m_data['test_data'] = {} div = count m_data['test_data']['mean'] = t_sum[i][0] / div if len(t_sum[i][1]) < 2: m_data['test_data']['std_dev'] = 0 else: std_dev = statistics.stdev(t_sum[i][1]) m_data['test_data']['std_dev'] = std_dev minimum = min(t_sum[i][2]) maximum = max(t_sum[i][3]) m_data['test_data']['minimum'] = minimum m_data['test_data']['maximum'] = maximum m_list.append(m_data) m_avg['testsuites'][tsname]['testcases'][tcname] = m_list data_json["avg_measurements"] = m_avg data_json['result']['end'] = str(data['end_time']) start = data_json['result']['start'].split()[1] end = data_json['result']['end'].split()[1] dur = str( datetime.datetime.strptime(end, FMT) - datetime.datetime.strptime(start, FMT)) fail_tests = [] fail_ts = [] for count in range(PtlTextTestRunner.cur_repeat_count): r_count = str(count + 1) fail_tests.extend( data_json['test_summary'][r_count]['tests_with_failures']) fail_ts.extend(data_json['test_summary'][r_count] ['test_suites_with_failures']) data_json['result']['duration'] = dur data_json['result']['tests_with_failures'] = list(set(fail_tests)) data_json['result']['test_suites_with_failures'] = list(set(fail_ts)) return data_json
class PBSAnonymizer(object): """ Holds and controls anonymizing operations of PBS data The anonymizer operates on attributes or resources. Resources operate on the resource name itself rather than the entire name, for example, to obfuscate the values associated to a custom resource "foo" that could be set as resources_available. foo resources_default.foo or Resource_List.foo, all that needs to be passed in to the function is "foo" in the list to obfuscate. :param attr_key: Attributes for which the attribute names themselves should be obfuscated :type attr_key: list or None :param attr_val: Attributes for which the values should be obfuscated :type attr_val: list or None :param resc_key: Resources for which the resource names themselves should be obfuscated :type resc_key: list or None :param resc_val: Resources for which the values should be obfuscated :type resc_val: list or None """ logger = logging.getLogger(__name__) utils = BatchUtils() du = DshUtils() def __init__(self, attr_delete=None, resc_delete=None, attr_key=None, attr_val=None, resc_key=None, resc_val=None): # special cases self._entity = False self.job_sort_formula = None self.schedselect = None self.select = None self.set_attr_delete(attr_delete) self.set_resc_delete(resc_delete) self.set_attr_key(attr_key) self.set_attr_val(attr_val) self.set_resc_key(resc_key) self.set_resc_val(resc_val) self.anonymize = self.anonymize_batch_status # global anonymized mapping data self.gmap_attr_val = {} self.gmap_resc_val = {} self.gmap_attr_key = {} self.gmap_resc_key = {} self.num_bad_acct_records = 0 def __get_anon_key(self, key, attr_map): """ Get an anonymized string for the 'key' belonging to attr_map :param key: the key to anonymize :type key: String :param attr_map: the attr_map to which the key belongs :type attr_map: dict :returns: an anonymized string for the key """ key = self.__refactor_key(key) if key in attr_map.keys(): anon_key = attr_map[key] else: anon_key = self.utils.random_str(len(key)) attr_map[key] = anon_key return anon_key @staticmethod def __refactor_key(key): """ There are some attributes which are aliases of each other and others which are lists like user/group lists, lists of hosts etc. Set a common key for them. """ key_lower = key.lower() if "user" in key_lower or key == "requestor": key = ANON_USER_K elif "group" in key_lower: key = ANON_GROUP_K elif "host" in key_lower: key = ANON_HOST_K elif key == "Name" or key == "Jobname": key = ANON_JOBNAME_K elif key == "account": key = ANON_ACCTNAME_K return key def __get_anon_value(self, key, value, kv_map): """ Get an anonymied string for the 'value' belonging to the kv_map provided. The kv_map will be in the following format: key:{val1:anon_val1, val2:anon_val2, ...} :param key: the key for this value :type key: String :param value: the value to anonymize :type value: String :param kv_map: the kv_map to which the key belongs :type kv_map: dict :returns: an anonymized string for the value """ if key == "project" and value == "_pbs_project_default": return "_pbs_project_default" # Deal with attributes which have a list of values if key in (ATTR_u, ATTR_managers, ATTR_M, ATTR_g, ATTR_aclResvhost, ATTR_aclhost, ATTR_auth_g, ATTR_auth_u): value_temp = "".join(value.split()) value_list = value_temp.split(",") elif key == ATTR_exechost: value_list = [] value_list_temp = value.split("+") for item in value_list_temp: value_list.append(item.split("/")[0]) else: value_list = [value] key = self.__refactor_key(key) # Go through the list of values and anonymize each in the value string for val in value_list: if "@" in val: # value if of type "user@host" # anonymize the user and host parts separately if ANON_HOST_K in self.attr_val: try: user, host = val.split("@") host = self.__get_anon_value(ANON_HOST_K, host, self.gmap_attr_val) user = self.__get_anon_value(ANON_USER_K, user, self.gmap_attr_val) anon_val = user + "@" + host value = value.replace(val, anon_val) continue except Exception: pass if key in kv_map: value_map = kv_map[key] anon_val = self.__get_anon_key(val, value_map) else: anon_val = self.utils.random_str(len(val)) kv_map[key] = {val: anon_val} value = value.replace(val, anon_val) return value def _initialize_key_map(self, keys): k = {} if keys is not None: if isinstance(keys, dict): return keys elif isinstance(keys, list): for i in keys: k[i] = None elif isinstance(keys, str): for i in keys.split(","): k[i] = None else: self.logger.error("unhandled map type") k = {None: None} return k def _initialize_value_map(self, keys): k = {} if keys is not None: if isinstance(keys, dict): return keys elif isinstance(keys, list): for i in keys: k[i] = {} elif isinstance(keys, str): for i in keys.split(","): k[i] = {} else: self.logger.error("unhandled map type") k = {None: None} return k def set_attr_delete(self, ad): """ Name of attributes to delete :param ad: Attributes to delete :type ad: str or list or dictionary """ self.attr_delete = self._initialize_value_map(ad) def set_resc_delete(self, rd): """ Name of resources to delete :param rd: Resources to delete :type rd: str or list or dictionary """ self.resc_delete = self._initialize_value_map(rd) def set_attr_key(self, ak): """ Name of attributes to obfuscate. :param ak: Attribute keys :type ak: str or list or dictionary """ self.attr_key = self._initialize_key_map(ak) def set_attr_val(self, av): """ Name of attributes for which to obfuscate the value :param av: Attributes value to obfuscate :type av: str or list or dictionary """ self.attr_val = self._initialize_value_map(av) if ("euser" or "egroup" or "project") in self.attr_val: self._entity = True def set_resc_key(self, rk): """ Name of resources to obfuscate :param rk: Resource key :type rk: str or list or dictionary """ self.resc_key = self._initialize_key_map(rk) def set_resc_val(self, rv): """ Name of resources for which to obfuscate the value :param rv: Resource value to obfuscate :type rv: str or list or dictionary """ self.resc_val = self._initialize_value_map(rv) def set_anon_map_file(self, name): """ Name of file in which to store anonymized map data. This file is meant to remain private to a site as it contains the sensitive anonymized data. :param name: Name of file to which anonymized data to store. :type name: str """ self.anon_map_file = name def anonymize_resource_group(self, filename): """ Anonymize the user and group fields of a resource group filename :param filename: Resource group filename :type filename: str """ anon_rg = [] try: f = open(filename) lines = f.readlines() f.close() except IOError: self.logger.error("Error processing " + filename) return None for data in lines: data = data.strip() if data: if data[0] == "#": continue _d = data.split() ug = _d[0] if ":" in ug: (euser, egroup) = ug.split(":") else: euser = ug egroup = None if "euser" not in self.attr_val: anon_euser = euser else: anon_euser = None if ANON_USER_K in self.gmap_attr_val: if euser in self.gmap_attr_val[ANON_USER_K]: anon_euser = self.gmap_attr_val[ANON_USER_K][euser] else: self.gmap_attr_val[ANON_USER_K] = {} if euser is not None and anon_euser is None: anon_euser = self.utils.random_str(len(euser)) self.gmap_attr_val[ANON_USER_K][euser] = anon_euser if "egroup" not in self.attr_val: anon_egroup = egroup else: anon_egroup = None if egroup is not None: if ANON_GROUP_K in self.gmap_attr_val: if egroup in self.gmap_attr_val[ANON_GROUP_K]: anon_egroup = (self.gmap_attr_val[ANON_GROUP_K] [egroup]) else: self.gmap_attr_val[ANON_GROUP_K] = {} if egroup is not None and anon_egroup is None: anon_egroup = self.utils.random_str(len(egroup)) self.gmap_attr_val[ANON_GROUP_K][egroup] = anon_egroup # reconstruct the fairshare info by combining euser and egroup out = [anon_euser] if anon_egroup is not None: out[0] += ":" + anon_egroup # and appending the rest of the original line out.append(_d[1]) if len(_d) > 1: p = _d[2].strip() if (ANON_USER_K in self.gmap_attr_val and p in self.gmap_attr_val[ANON_USER_K]): out.append(self.gmap_attr_val[ANON_USER_K][p]) else: out.append(_d[2]) if len(_d) > 2: out += _d[3:] anon_rg.append(" ".join(out)) return anon_rg def anonymize_resource_def(self, resources): """ Anonymize the resource definition """ if not self.resc_key: return resources for curr_anon_resc, val in self.resc_key.items(): if curr_anon_resc in resources: tmp_resc = copy.copy(resources[curr_anon_resc]) del resources[curr_anon_resc] if val is None: if curr_anon_resc in self.gmap_resc_key: val = self.gmap_resc_key[curr_anon_resc] else: val = self.utils.random_str(len(curr_anon_resc)) elif curr_anon_resc not in self.gmap_resc_key: self.gmap_resc_key[curr_anon_resc] = val tmp_resc.set_name(val) resources[val] = tmp_resc return resources def __anonymize_fgc(self, d, attr, ar, val): """ Anonymize an FGC limit value """ m = {"u": "euser", "g": "egroup", "p": "project"} if "," in val: fgc_lim = val.split(",") else: fgc_lim = [val] nfgc = [] for lim in fgc_lim: _fgc = PbsTypeFGCLimit(attr, lim) ename = _fgc.entity_name if ename in ("PBS_GENERIC", "PBS_ALL"): nfgc.append(lim) continue obf_ename = ename for etype, nm in m.items(): if _fgc.entity_type == etype: if nm not in self.gmap_attr_val: if nm in ar and ename in ar[nm]: obf_ename = ar[nm][ename] else: obf_ename = self.utils.random_str(len(ename)) self.gmap_attr_val[nm] = {ename: obf_ename} elif ename in self.gmap_attr_val[nm]: if ename in self.gmap_attr_val[nm]: obf_ename = self.gmap_attr_val[nm][ename] break _fgc.entity_name = obf_ename nfgc.append(_fgc.__val__()) d[attr] = ",".join(nfgc) def __anonymize_attr_val(self, d, attr, ar, name, val): """ Obfuscate an attribute/resource values """ # don't obfuscate default project if attr == "project" and val == "_pbs_project_default": return nstr = [] if "." in attr: m = self.gmap_resc_val else: m = self.gmap_attr_val if val in ar[name]: nstr.append(ar[name][val]) if name in self.lmap: self.lmap[name][val] = ar[name][val] else: self.lmap[name] = {val: ar[name][val]} if name not in m: m[name] = {val: ar[name][val]} elif val not in m[name]: m[name][val] = ar[name][val] else: # Obfuscate by randomizing with a value of the same length tmp_v = val.split(",") for v in tmp_v: if v in ar[name]: r = ar[name][v] elif name in m and v in m[name]: r = m[name][v] else: r = self.utils.random_str(len(v)) if not isinstance(ar[name], dict): ar[name] = {} ar[name][v] = r self.lmap[name] = {v: r} if name not in m: m[name] = {v: r} elif v not in m[name]: m[name][v] = r nstr.append(r) if d is not None: d[attr] = ",".join(nstr) def __anonymize_attr_key(self, d, attr, ar, name, res): """ Obfuscate an attribute/resource key """ if res is not None: m = self.gmap_resc_key else: m = self.gmap_attr_key if not ar[name]: if name in m: ar[name] = m[name] else: randstr = self.utils.random_str(len(name)) ar[name] = randstr m[name] = randstr if d is not None: tmp_val = d[attr] del d[attr] if res is not None: d[res + "." + ar[name]] = tmp_val else: d[ar[name]] = tmp_val if name not in self.lmap: self.lmap[name] = ar[name] if name not in m: m[name] = ar[name] def anonymize_batch_status(self, data=None): """ Anonymize arbitrary batch_status data :param data: Batch status data :type data: List or dictionary """ if not isinstance(data, (list, dict)): self.logger.error("data expected to be dict or list") return None if isinstance(data, dict): dat = [data] else: dat = data # Local mapping data used to store obfuscation mapping data for this # specific item, d self.lmap = {} # loop over each "batch_status" entry to obfuscate for d in dat: if self.attr_delete is not None: for todel in self.attr_delete: if todel in d: del d[todel] if self.resc_delete is not None: for todel in self.resc_delete: for tmpk in d.keys(): if "." in tmpk and todel == tmpk.split(".")[1]: del d[tmpk] # Loop over each object's attributes, this is where the special # cases are handled (e.g., FGC limits, formula, select spec...) for attr in d: val = d[attr] if "." in attr: (res_type, res_name) = attr.split(".") else: res_type = None res_name = attr if res_type is not None: if self._entity and (attr.startswith("max_run") or attr.startswith("max_queued")): self.__anonymize_fgc(d, attr, self.attr_val, val) if res_name in self.resc_val: if (attr.startswith("max_run") or attr.startswith("max_queued")): self.__anonymize_fgc(d, attr, self.attr_val, val) self.__anonymize_attr_val(d, attr, self.resc_val, res_name, val) if res_name in self.resc_key: self.__anonymize_attr_key(d, attr, self.resc_key, res_name, res_type) else: if attr in self.attr_val: self.__anonymize_attr_val(d, attr, self.attr_val, attr, val) if attr in self.attr_key: self.__anonymize_attr_key(d, attr, self.attr_key, attr, None) if ((attr in ("job_sort_formula", "schedselect", "select")) and self.resc_key): for r in self.resc_key: if r in val: if r not in self.gmap_resc_key: self.gmap_resc_key[ r] = self.utils.random_str(len(r)) val = val.replace(r, self.gmap_resc_key[r]) setattr(self, attr, val) d[attr] = val @staticmethod def __verify_key(line, key): """ Verify that a given key is actually a key in the context of the line given. :param line: the line to check in :type line: String :param key: the key to find :type key: String :returns a tuple of (key index, 1st character of key's value) :returns None if the key is invalid """ line_len = len(line) key_len = len(key) key_index = line.find(key, 0, line_len) line_nospaces = "".join(line.split()) len_nospaces = len(line_nospaces) key_idx_nospaces = line_nospaces.find(key, 0, len_nospaces) value_char = None # Find all instances of the string representing key in the line # Find the instance which is a valid key while key_index >= 0 and key_index < line_len: valid_key = True # Make sure that the characters before & after are not alpanum if key_index != 0: index_before = key_index - 1 char_before = line[index_before] if char_before.isalnum() is True: valid_key = False else: char_before = None if valid_key is True: if key_index < line_len: index_after = key_index + key_len char_after = line[index_after] if char_after.isalnum() is True: valid_key = False else: char_after = None if valid_key is True: # if 'char_after' is not "=", then the characters before # and after should be the delimiter, and be equal if char_before is not None and char_after is not None: if char_after != "=": if char_before != char_after: valid_key = False if valid_key is True: # Now, let's look at the whitespace stripped line index_after = key_idx_nospaces + key_len if index_after >= len_nospaces: # Nothing after the key, can't be a key valid_key = False else: # Find a valid operator after the key # valid operators: =, +=, -=, == if line_nospaces[index_after] != "=": # Check for this case: "key +=/-=/== value" if line_nospaces[index_after] in ("+", "-"): index_after = index_after + 1 if line_nospaces[index_after] != "=": valid_key = False else: valid_key = False if valid_key is True: val_idx_nospaces = index_after + 1 if val_idx_nospaces >= len_nospaces: # There's no value!, can't be a valid key valid_key = False if valid_key is False: # Find the next instance of the key key_index = line.find(key, key_index + len(key), line_len) key_idx_nospaces = line_nospaces.find(key, key_idx_nospaces + len(key), len_nospaces) else: # Seems like a valid key! # Break out of the loop value_char = line_nospaces[val_idx_nospaces] break if key_index == -1 or key_idx_nospaces == -1: return None return (key_index, value_char) def __get_value(self, line, key): """ Get the 'value' of a kv pair for the key given, from the line given :param line: the line to search in :type line: String :param key: the key for the value :type key: String :returns: String containing the value or None """ # Check if the line is of type: # <attribute name> = <value> line_list_spaces = line.split() if line_list_spaces is not None: first_word = line_list_spaces[0] if key == first_word: # Check that this word is followed by an '=' sign equals_sign = line_list_spaces[1] if equals_sign == "=": # Ok, we are going to assume that this is enough to # determine that this is the correct type # return everything after the '=" as value val_index = line.index("=") + 1 value = line[val_index:].strip() return value # Check that a valid instance of this key exists in the string kv = self.__verify_key(line, key) if kv is None: return None key_index, val_char = kv # Assumption: the character before the key is the delimiter # for the k-v pair delimiter = line[key_index - 1] if delimiter is None: # Hard luck, now there's no way to know, let's just assume # that space is the delimiter and hope for the best delimiter = " " # Determine the value's start index index_after_key = key_index + len(key) value_index = line[index_after_key:].find(val_char) + index_after_key # Get the value lexer = shlex.shlex(line[value_index:], posix=True) lexer.whitespace = delimiter lexer.whitespace_split = True try: value = lexer.get_token() except ValueError: # Sometimes, the data can be incoherent with things like # Unclosed quotes, which makes get_token() throw an exception # Just return None return None # Strip the value of any trailing whitespaces (like newlines) value = value.rstrip() return value @staticmethod def __delete_kv(line, key, value): """ Delete a key-value pair from a line If after deleting the k-v pair, the left over string has no alphanumeric characters, then delete the line :param line: the line in question :type line: String :param key: the key ofo the kv pair :type key: String :param value: the value of the kv pair :type value: String :returns: the line without the kv pair :returns: None if the line should be deleted """ key_index = line.find(key) index_after_key = key_index + len(key) line_afterkey = line[index_after_key:] value_index = line_afterkey.find(value) + index_after_key # find the index of the last character of value end_index = value_index + len(value) # Find the start index of the kv pair # Also include the character before the key # This will remove an extra delimiter that would be # left after the kv pair is deleted start_index = key_index - 1 if start_index < 0: start_index = 0 # Remove the kv pair line = line[:start_index] + line[end_index:] # Check if there's any alphanumeric characters left in the line if re.search("[A-Za-z0-9]", line) is None: # Delete the whole line return None return line def __add_alias_attr(self, key, alias_key): """ Some attributes have aliases. Added alias for a given attribute to the global maps :param key: the original attribute :type key: str :param alias_key: the alias :type alias_key: str """ if key in self.attr_delete: self.attr_delete[alias_key] = self.attr_delete[key] if key in self.attr_key: self.attr_key[alias_key] = self.attr_key[key] if key in self.attr_val: self.attr_val[alias_key] = self.attr_val[key] if key in self.resc_delete: self.resc_delete[alias_key] = self.resc_delete[key] if key in self.resc_key: self.resc_key[alias_key] = self.resc_key[key] if key in self.resc_val: self.resc_val[alias_key] = self.resc_val[key] def anonymize_file_tabular(self, filename, extension=".anon", inplace=False): """ Anonymize pbs short format outputs (tabular form) (e.g - qstat, pbsnodes -aS) The 'titles' of various columns are used to look up keys inside the global attribute maps and they are anonymized/removed accordingly. Warning: only works work PBS tabular outputs, not generic. :param filename: Name of the file to anonymize :type filename: str :param delim: delimiter for the table :type delim: str :param extension: Extension of the anonymized file :type extension: str :param inplace: If true returns the original file name for which contents have been replaced :type inplace: bool :returns: a str object containing filename of the anonymized file """ fn = self.du.create_temp_file() # qstat outputs sometimes have different names for some attributes self.__add_alias_attr(ATTR_euser, "User") self.__add_alias_attr(ATTR_euser, "Username") self.__add_alias_attr(ATTR_name, "Jobname") self.__add_alias_attr(ATTR_name, "Name") # pbsnodes -aS output has a 'host' field which should be anonymized self.__add_alias_attr(ATTR_NODE_Host, "host") header = None with open(filename) as f, open(fn, "w") as nf: # Get the header and the line with '-'s # Also write out the header and dash lines to the output file line_num = 0 for line in f: nf.write(line) line_num += 1 line_strip = line.strip() if len(line_strip) == 0: continue if line_strip[0].isalpha(): header = line continue # Dash line is the line after header if header is not None: dash_line = line break if header is None: # Couldn't find the header # Remove the aliases return filename # The dash line tells us the length of each column dash_list = dash_line.split() col_length = {} # Store each column's length col_index = 0 for item in dash_list: col_len = len(item) col_length[col_index] = col_len col_index += 1 # Find out the columns to anonymize/delete del_columns = [] anon_columns = {} start_index = 0 end_index = 0 for col_index, length in enumerate(col_length): start_index = end_index end_index = start_index + length + 1 # Get the column's title title = header[start_index:end_index] title = title.strip() if title in self.attr_delete.keys(): # Need to delete this whole column del_columns.append(col_index) elif title in self.attr_val.keys(): # Need to anonymize all values in the column anon_columns[col_index] = title anon_col_keys = anon_columns.keys() # Go through the file and anonymize/delete columns for line in f: start_index = 0 end_index = 0 # Iterate over the different fields col_index = 0 for col_index in range(len(col_length)): length = col_length[col_index] start_index = end_index end_index = start_index + length if col_index in del_columns: # Need to delete the value of this column # Just replace the value by blank spaces line2 = list(line) for i in range(len(line2)): if i >= start_index and i < end_index: line2[i] = " " line = "".join(line2) elif col_index in anon_col_keys: # Need to anonymize this column's value # Get the value value = line[start_index:end_index] value_strip = value.strip() anon_val = self.__get_anon_value( anon_columns[col_index], value_strip, self.gmap_attr_val) line = line.replace(value_strip, anon_val) nf.write(line) if inplace: out_filename = filename else: out_filename = filename + extension os.rename(fn, out_filename) return out_filename def anonymize_file_kv(self, filename, extension=".anon", inplace=False): """ Anonymize a file which has data in the form of key-value pairs. Replace every occurrence of any entry in the global map for the given file by its anonymized values. :param filename: Name of the file to anonymize :type filename: str :param extension: Extension of the anonymized file :type extension: str :param inplace: If true returns the original file name for which contents have been replaced :type inplace: bool :returns: a str object containing filename of the anonymized file """ fn = self.du.create_temp_file() with open(filename) as f, open(fn, "w") as nf: delete_line = False for line in f: # Check if this is a line extension for an attr being deleted if delete_line is True and line[0] == "\t": continue delete_line = False # Check if any of the attributes to delete are in the line for key in self.attr_delete.keys(): if key in line: value = self.__get_value(line, key) if value is None: continue # Delete the key-value pair line = self.__delete_kv(line, key, value) if line is None: delete_line = True break if delete_line is True: continue # Anonymize key-value pairs for key in self.attr_key.keys(): if key in line: if self.__verify_key(line, key) is None: continue anon_key = self.__get_anon_key(key, self.gmap_attr_key) line = line.replace(key, anon_key) for key in self.resc_key.keys(): if key in line: if self.__verify_key(line, key) is None: continue anon_key = self.__get_anon_key(key, self.gmap_resc_key) line = line.replace(key, anon_key) for key in self.attr_val.keys(): if key in line: value = self.__get_value(line, key) if value is None: continue anon_value = self.__get_anon_value(key, value, self.gmap_attr_val) line = line.replace(value, anon_value) for key in self.resc_val.keys(): if key in line: value = self.__get_value(line, key) if value is None: continue anon_value = self.__get_anon_value(key, value, self.gmap_resc_val) line = line.replace(value, anon_value) # Anonymize IP addresses pattern = re.compile( "\b*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b*") match_obj = re.search(pattern, line) if match_obj: ip = match_obj.group(0) anon_key = self.__get_anon_key(ip, self.gmap_attr_key) line = line.replace(ip, anon_key) nf.write(line) if inplace: out_filename = filename else: out_filename = filename + extension os.rename(fn, out_filename) return out_filename def anonymize_accounting_log(self, logfile): """ Anonymize the accounting log :param logfile: Acconting log file :type logfile: str """ try: f = open(logfile) except IOError: self.logger.error("Error processing " + logfile) return None self.__add_alias_attr(ATTR_euser, "user") self.__add_alias_attr(ATTR_euser, "requestor") self.__add_alias_attr(ATTR_egroup, "group") self.__add_alias_attr(ATTR_A, "account") anon_data = [] for data in f: # accounting log format is # %Y/%m/%d %H:%M:%S;<Key>;<Id>;<key1=val1> <key2=val2> ... curr = data.split(";", 3) if curr[1] in ("A", "L"): anon_data.append(data.strip()) continue buf = shlex.split(curr[3].strip()) skip_record = False # Split the attribute list into key value pairs kvl_list = map(lambda n: n.split("=", 1), buf) for kvl in kvl_list: try: k, v = kvl except ValueError: self.num_bad_acct_records += 1 self.logger.debug("Bad accounting record found:\n" + data) skip_record = True break if k in self.attr_val: anon_kv = self.__get_anon_value(k, v, self.gmap_attr_val) kvl[1] = anon_kv if k in self.attr_key: anon_ak = self.__get_anon_key(k, self.gmap_attr_key) kvl[0] = anon_ak if "." in k: restype, resname = k.split(".") for rv in self.resc_val: if resname == rv: anon_rv = self.__get_anon_value( resname, rv, self.gmap_resc_val) kvl[1] = anon_rv if resname in self.resc_key: anon_rk = self.__get_anon_key(resname, self.gmap_resc_key) kvl[0] = restype + "." + anon_rk if not skip_record: anon_data.append(";".join(curr[:3]) + ";" + " ".join(["=".join(n) for n in kvl_list])) f.close() return anon_data def anonymize_sched_config(self, scheduler): """ Anonymize the scheduler config :param scheduler: PBS scheduler object """ if len(self.resc_key) == 0: return # when anonymizing we get rid of the comments as they may contain # sensitive information scheduler._sched_config_comments = {} # If resources need to be anonymized then update the resources line # job_sort_key and node_sort_key sr = scheduler.get_resources() if sr: for i, sres in enumerate(sr): if sres in self.resc_key: if sres in self.gmap_resc_key: sr[i] = self.gmap_resc_key[sres] else: anon_res = self.utils.random_str(len(sres)) self.gmap_resc_key[sres] = anon_res sr[i] = anon_res scheduler.sched_config["resources"] = ",".join(sr) for k in ["job_sort_key", "node_sort_key"]: if k in scheduler.sched_config: sc_jsk = scheduler.sched_config[k] if not isinstance(sc_jsk, list): sc_jsk = list(sc_jsk) for r in self.resc_key: for i, key in enumerate(sc_jsk): if r in key: sc_jsk[i] = key.replace(r, self.resc_key[r]) def __str__(self): return ("Attributes Values: " + str(self.gmap_attr_val) + "\n" + "Resources Values: " + str(self.gmap_resc_val) + "\n" + "Attributes Keys: " + str(self.gmap_attr_key) + "\n" + "Resources Keys: " + str(self.gmap_resc_key))