def adjust_job_start_end(job): """ Set the job node start and end times based on the presence of the special job-X-begin and job-X-end archives. Do nothing if these archives are absent """ startarchive = "job-{0}-begin".format(job.job_id) endarchive = "job-{0}-end".format(job.job_id) for nodename, filepaths in job.rawarchives(): begin = None end = None for fname in filepaths: filename = os.path.basename(fname) if filename.startswith(startarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) mdata = context.pmGetArchiveLabel() begin = datetime.datetime.utcfromtimestamp( math.floor(mdata.start)) if filename.startswith(endarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) end = datetime.datetime.utcfromtimestamp( math.ceil(context.pmGetArchiveEnd())) job.setnodebeginend(nodename, begin, end)
def __init__(self, pcp_fname, start=None, finish=None, interval=None): '''Opens a PCP archive and does an initial walk of the PMNS tree. start and finish must be in datetime format. Interval must be in seconds''' self.pcparchive = pcp_fname try: self.ctx = pmapi.pmContext(c_api.PM_CONTEXT_ARCHIVE, pcp_fname) except pmapi.pmErr as e: print("Error: {0}".format(e)) sys.exit(-1) self.ctx.pmTraversePMNS('', self._pmns_callback) label = self.ctx.pmGetArchiveLabel() self.start = None self.finish = None self.interval = None if start: self.start = pmapi.timeval(sec=start) else: self.start = label.start if finish: self.finish = pmapi.timeval(sec=start) else: self.finish = self.ctx.pmGetArchiveEnd() if interval: self.interval = pmapi.timeval(sec=interval)
def processarchive(self, archive): """ Try to open the pcp archive and extract the timestamps of the first and last records and hostname. Store this in the DbArchiveCache """ try: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = context.pmGetArchiveLabel() hostname = mdata.hostname if self.hostname_mode == "fqdn": # The fully qualiifed domain name uniqly identifies the host. Ensure to # add it if it is missing if self.hostnameext != "" and (not hostname.endswith( self.hostnameext)): hostname += "." + self.hostnameext elif self.hostname_mode == "hostname": # The full domain name is ignored and only the hostname part matters # to uniquely identify a node hostname = mdata.hostname.split(".")[0] jobid = self.parsejobid(archive) self.dbac.insert(self.resource_id, hostname, archive[:-6], float(mdata.start), float(context.pmGetArchiveEnd()), jobid) logging.debug("processed archive %s", archive) except pmapi.pmErr as exc: logging.error("archive %s. %s", archive, exc.message())
def processarchive(self, archive): """ Try to open the pcp archive and extract the timestamps of the first and last records and hostname. Store this in the DbArchiveCache """ try: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = context.pmGetArchiveLabel() hostname = mdata.hostname if self.hostname_mode == "fqdn": # The fully qualiifed domain name uniqly identifies the host. Ensure to # add it if it is missing if self.hostnameext != "" and (not hostname.endswith(self.hostnameext)): hostname += "." + self.hostnameext elif self.hostname_mode == "hostname": # The full domain name is ignored and only the hostname part matters # to uniquely identify a node hostname = mdata.hostname.split(".")[0] jobid = self.parsejobid(archive) self.dbac.insert(self.resource_id, hostname, archive[:-6], float(mdata.start), float(context.pmGetArchiveEnd()), jobid) logging.debug("processed archive %s", archive) except pmapi.pmErr as exc: logging.error("archive %s. %s", archive, exc.message())
def __init__(self, archivelist, opts=None): self.node_archives = archivelist self.jobdir = os.path.dirname(archivelist[0]) self.job_id = opts['job_id'] if 'job_id' in opts else "1" self.end_str = "end" self.walltime = 9751 self.nodecount = len(archivelist) self.acct = { "end_time": 12312, "id": 1, "uid": opts['acct_uid'] if 'acct_uid' in opts else "sdf", "user": "******", "partition": "test", "local_job_id": "1234", "resource_manager": "slurm" } self.nodes = [os.path.basename(x) for x in archivelist] self._data = {} self._errors = [] archive_starts = [] archive_ends = [] for archive in archivelist: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = context.pmGetArchiveLabel() archive_starts.append( datetime.datetime.utcfromtimestamp(math.floor(mdata.start))) archive_ends.append( datetime.datetime.utcfromtimestamp( math.ceil(context.pmGetArchiveEnd()))) self.start_datetime = min(archive_starts) self.end_datetime = max(archive_ends)
def extract_and_merge_logs(job, conf, resconf): """ merge all of the raw pcp archives into one archive per node for each node in the job """ for nodename, filepaths in job.rawarchives(): for fname in filepaths: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) process_slurm_metadata(context, job, nodename) return pmlogextract(job, conf, resconf)
def adjust_job_start_end(job): """ Set the job node start and end times based on the presence of the special job-X-begin and job-X-end archives. Do nothing if these archives are absent. Note that the job start and end archives are keyed on the local_job_id, which may not be globally unique. For example if a job gets requeued on the same compute node with the same local_job_id. To exclude the job start/end archives from other runs of the job, the adjustment is only performed if the start/end archives are within 30 seconds of the accounting times. """ startarchive = "job-{0}-begin".format(job.job_id) endarchive = "job-{0}-end".format(job.job_id) for nodename, filepaths in job.rawarchives(): begin = None end = None for fname in filepaths: try: filename = os.path.basename(fname) if filename.startswith(startarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) mdata = context.pmGetArchiveLabel() archive_begin = datetime.datetime.utcfromtimestamp( math.floor(mdata.start)) start_delta = archive_begin - job.start_datetime if abs(start_delta.total_seconds()) <= 30: begin = archive_begin if filename.startswith(endarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) archive_end = datetime.datetime.utcfromtimestamp( math.ceil(context.pmGetArchiveEnd())) end_delta = archive_end - job.end_datetime if abs(end_delta.total_seconds()) <= 30: end = archive_end except pmapi.pmErr as exp: logging.warning('PCP archive %s', exp) job.mark_bad_rawarchive(nodename, fname, str(exp)) job.setnodebeginend(nodename, begin, end)
def __init__(self, pcp_fname, start=None, end=None): '''Opens a PCP archive and does an initial walk of the PMNS tree''' self.pcparchive = pcp_fname self.context = pmapi.pmContext(c_api.PM_CONTEXT_ARCHIVE, pcp_fname) self.context.pmTraversePMNS('', self._pmns_callback) self.start = start self.end = end tmp = self.context.pmGetArchiveLabel() self.start_time = tmp.start self.end_time = self.context.pmGetArchiveEnd()
def __init__(self): try: self.context = pmapi.pmContext(target='local:') except: return self.context.pmTraversePMNS('', self._pmns_callback) for metric in self.pmns: try: pmid = self.context.pmLookupName(metric) text = self.context.pmLookupText(pmid[0], kind = c_api.PM_TEXT_HELP) self.help_text[metric] = text except: pass
def adjust_job_start_end(job): """ Set the job node start and end times based on the presence of the special job-X-begin and job-X-end archives. Do nothing if these archives are absent """ startarchive = "job-{0}-begin".format(job.job_id) endarchive = "job-{0}-end".format(job.job_id) for nodename, filepaths in job.rawarchives(): begin = None end = None for fname in filepaths: filename = os.path.basename(fname) if filename.startswith(startarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) mdata = context.pmGetArchiveLabel() begin = datetime.datetime.utcfromtimestamp(math.floor(mdata.start)) if filename.startswith(endarchive): context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, fname) end = datetime.datetime.utcfromtimestamp(math.ceil(context.pmGetArchiveEnd())) job.setnodebeginend(nodename, begin, end)
def __init__(self): try: self.ctx = pmapi.pmContext(target='local:') except Exception: print("Unable to contact local pmcd. Help text will be missing") return self.ctx.pmTraversePMNS('', self._pmns_callback) for metric in self.pmns: try: pmid = self.ctx.pmLookupName(metric) text = self.ctx.pmLookupText(pmid[0], kind=c_api.PM_TEXT_HELP) self.help_text[metric] = text except Exception: pass
def processarchive(self, nodename, nodeidx, archive): """ process the archive """ # TODO need to benchmark code to see if there is a benefit to interleaving the calls to # pmFetch for the different contexts. This version runs all the pmFetches for each analytic # in turn. context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = ArchiveMeta(nodename, nodeidx, context.pmGetArchiveLabel()) for preproc in self.preprocs: context.pmSetMode(c_pmapi.PM_MODE_FORW, mdata.archive.start, 0) self.processforpreproc(context, mdata, preproc) for analytic in self.alltimestamps: context.pmSetMode(c_pmapi.PM_MODE_FORW, mdata.archive.start, 0) self.processforanalytic(context, mdata, analytic) for analytic in self.firstlast: context.pmSetMode(c_pmapi.PM_MODE_FORW, mdata.archive.start, 0) self.processfirstlast(context, mdata, analytic)
def setUpClass(cls): print "\nCreating test iSCSI target using iscsi_config.json\n" ISCSITests.target = ISCSITarget() pminfo = PMInfo() pminfo.lun = {} ctx = pmapi.pmContext() # # First look at the some sample summary stats pmids = ctx.pmLookupName(summary_metrics) descs = ctx.pmLookupDescs(pmids) results = ctx.pmFetch(pmids) for i in range(results.contents.numpmid): atom = ctx.pmExtractValue(results.contents.get_valfmt(0), results.contents.get_vlist(i, 0), descs[i].contents.type, c_api.PM_TYPE_U32) field_name = summary_metrics[i].split('.')[-1] setattr(pminfo, field_name, atom.ul) # # Now look at the lun stats pmids = ctx.pmLookupName("lio.lun.iops") descs = ctx.pmLookupDescs(pmids) results = ctx.pmFetch(pmids) devices = ctx.pmGetInDom(descs[0])[1] for i in range(results.contents.get_numval(0)): dev_name = devices[i] iops = ctx.pmExtractValue(results.contents.get_valfmt(0), results.contents.get_vlist(0, i), descs[0].contents.type, c_api.PM_TYPE_U32) pminfo.lun[dev_name] = iops.ul pminfo.lun_list = pminfo.lun.keys() ISCSITests.pminfo = pminfo
def processarchive(self, nodename, nodeidx, archive): """ process the archive """ context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = ArchiveMeta(nodename, nodeidx, context.pmGetArchiveLabel()) context.pmSetMode(c_pmapi.PM_MODE_FORW, mdata.archive.start, 0) # TODO need to benchmark code to see if there is a benefit to interleaving the calls to # pmFetch for the different contexts. This version runs all the pmFetches for each analytic # in turn. basecontext = context.ctx for preproc in self.preprocs: context._ctx = basecontext newctx = context.pmDupContext() context._ctx = newctx self.processforpreproc(context, mdata, preproc) context.__del__() for analytic in self.alltimestamps: context._ctx = basecontext newctx = context.pmDupContext() context._ctx = newctx self.processforanalytic(context, mdata, analytic) context.__del__() for analytic in self.firstlast: context._ctx = basecontext newctx = context.pmDupContext() context._ctx = newctx self.processfirstlast(context, mdata, analytic) context.__del__() context._ctx = basecontext del context
def processarchive(self, archive, fast_index, host_from_path=None): """ Try to open the pcp archive and extract the timestamps of the first and last records and hostname. Store this in the DbArchiveCache """ start_timestamp = None if fast_index: start_timestamp = self.get_archive_data_fast(archive) if start_timestamp is not None: hostname = host_from_path end_timestamp = start_timestamp else: # fallback implementation that opens the archive try: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = context.pmGetArchiveLabel() hostname = mdata.hostname start_timestamp = float(mdata.start) end_timestamp = float(context.pmGetArchiveEnd()) except pmapi.pmErr as exc: #pylint: disable=not-callable logging.error("archive %s. %s", archive, exc.message()) return None if self.hostname_mode == "fqdn": # The fully qualiifed domain name uniqly identifies the host. Ensure to # add it if it is missing if self.hostnameext != "" and (not hostname.endswith( self.hostnameext)): hostname += "." + self.hostnameext elif self.hostname_mode == "hostname": # The full domain name is ignored and only the hostname part matters # to uniquely identify a node hostname = hostname.split(".")[0] jobid = self.parsejobid(archive) return hostname, archive[:-6], start_timestamp, end_timestamp, jobid
def __init__(self, archivelist): self.node_archives = archivelist self.jobdir = os.path.dirname(archivelist[0]) self.job_id = "1" self.end_str = "end" self.walltime = 9751 self.nodecount = len(archivelist) self.acct = {"end_time": 12312, "id": 1, "uid": "sdf", "user": "******"} self.nodes = ["node" + str(i) for i in xrange(len(archivelist))] self._data = {} archive_starts = [] archive_ends = [] for archive in archivelist: context = pmapi.pmContext(c_pmapi.PM_CONTEXT_ARCHIVE, archive) mdata = context.pmGetArchiveLabel() archive_starts.append( datetime.datetime.utcfromtimestamp(math.floor(mdata.start))) archive_ends.append( datetime.datetime.utcfromtimestamp( math.ceil(context.pmGetArchiveEnd()))) self.start_datetime = min(archive_starts) self.end_datetime = max(archive_ends)
def connect(self): """ Establish a PMAPI context to archive, host or local, via args """ self.context = pmapi.pmContext()
ly = y + halfDiskSize lw = VSPACE + 2 lh = thickness else: # moved down lx = x + halfDiskSize ly = oldY + DISKSIZE - 1 lw = thickness lh = VSPACE + 2 print "_line %d %d %d %d" % (lx, ly, lw, lh) print "_led %d %d %d %d" % (x, y, DISKSIZE, DISKSIZE) print " _metric disk.dev.total[\"%s\"]" % (mapping.name()) print " _legend diskLegend" print " _actions diskActions" oldX = x oldY = y xStep = dir * (DISKSIZE + VSPACE) # use VSPACE (tighter packing) x += xStep if x > maxX - DISKSIZE or x <= HSPACE: x -= xStep y += DISKSIZE + VSPACE dir = -dir if __name__ == '__main__': context = pmapi.pmContext() machine = Machine(context) machine.inventory() # machine.details() machine.gadgetize()
ly = y + halfDiskSize lw = VSPACE + 2 lh = thickness else: # moved down lx = x + halfDiskSize ly = oldY + DISKSIZE - 1 lw = thickness lh = VSPACE + 2 print "_line %d %d %d %d" % (lx, ly, lw, lh) print "_led %d %d %d %d" % (x, y, DISKSIZE, DISKSIZE) print " _metric disk.dev.total[\"%s\"]" % (mapping.name()) print " _legend diskLegend" print " _actions diskActions" oldX = x oldY = y xStep = dir * (DISKSIZE + VSPACE ) # use VSPACE (tighter packing) x += xStep if x > maxX - DISKSIZE or x <= HSPACE: x -= xStep y += DISKSIZE + VSPACE dir = -dir if __name__ == '__main__': context = pmapi.pmContext() machine = Machine(context) machine.inventory() # machine.details() machine.gadgetize()
archive = input_file if not os.path.exists(input_file): return input_file + " does not exist" for line in open(input_file): if (line[:8] == "Archive:"): tokens = line[:-1].split() archive = os.path.join(os.path.dirname(input_file), tokens[2]) try: pmc = pmapi.pmContext(c_api.PM_CONTEXT_ARCHIVE, archive) except pmapi.pmErr, e: return "Cannot open PCP archive: " + archive else: if host == "": host = "local:" try: pmc = pmapi.pmContext(target=host) except pmapi.pmErr, e: return "Cannot connect to pmcd on " + host if duration_arg != 0: (timeval, errmsg) = pmc.pmParseInterval(duration_arg) if code < 0: return errmsg duration = timeval.tv_sec ss.setup_metrics (pmc) if create_archive: configuration = "log mandatory on every " + \ str(interval_arg) + " seconds { " configuration += ss.dump_metrics()
if create_archive: map(subsys.append, (cpu, disk, net, interrupt, memory)) else: map(subsys.append, (cpu, disk, net)) if replay_archive: archive = input_file if not os.path.exists(input_file): print input_file, "does not exist" sys.exit(1) for line in open(input_file): if (line[:8] == "Archive:"): tokens = line[:-1].split() archive = os.path.join(os.path.dirname(input_file), tokens[2]) try: pm = pmapi.pmContext(c_api.PM_CONTEXT_ARCHIVE, archive) except pmapi.pmErr, e: print "Cannot open PCP archive: %s" % archive sys.exit(1) else: if host == "": host = "local:" try: pm = pmapi.pmContext(target=host) except pmapi.pmErr, e: print "Cannot connect to pmcd on " + host sys.exit(1) # Find server-side pmcd host-name host = pm.pmGetContextHostName()
def main (stdscr_p): global stdscr stdscr = _StandardOutput(stdscr_p) output_file = "" input_file = "" sort = "" duration = 0 interval_arg = 5 duration_arg = 0 n_samples = 0 output_type = "g" host = "" create_archive = False replay_archive = False i = 1 subsys_options = ("g", "m") class NextOption(Exception): pass while i < len(sys.argv): try: if (sys.argv[i][:1] == "-"): for ssx in subsys_options: if sys.argv[i][1:] == ssx: output_type = ssx raise NextOption if (sys.argv[i] == "-w"): i += 1 output_file = sys.argv[i] create_archive = True elif (sys.argv[i] == "-r"): i += 1 input_file = sys.argv[i] replay_archive = True elif (sys.argv[i] == "-L"): i += 1 stdscr.width = int(sys.argv[i]) elif (sys.argv[i] == "--help"): return usage() elif (sys.argv[i] == "-h"): i += 1 host = sys.argv[i] else: return sys.argv[0] + ": Unknown option " + sys.argv[i] \ + "\nTry `" + sys.argv[0] + " --help' for more information." else: interval_arg = int(sys.argv[i]) i += 1 if (i < len(sys.argv)): n_samples = int(sys.argv[i]) i += 1 except NextOption: i += 1 pass ss = Subsystem() ss.init_processor_metrics() ss.init_memory_metrics() ss.init_disk_metrics() ss.init_network_metrics() ss.init_process_metrics() cpu = _ProcessorPrint(ss, stdscr) mem = _MemoryPrint(ss, stdscr) disk = _DiskPrint(ss, stdscr) net = _NetPrint(ss, stdscr) proc = _ProcPrint(ss, stdscr) proc.output_type = output_type if replay_archive: archive = input_file if not os.path.exists(input_file): return input_file + " does not exist" for line in open(input_file): if (line[:8] == "Archive:"): tokens = line[:-1].split() archive = os.path.join(os.path.dirname(input_file), tokens[2]) try: pmc = pmapi.pmContext(c_api.PM_CONTEXT_ARCHIVE, archive) except pmapi.pmErr, e: return "Cannot open PCP archive: " + archive