def main(): """ Main routine: Cross reference a list of nodes to find common jobs """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[1:] swfracs = list() for date in joblist: datelist = list() datelist.append(date) jobs = jobstats.alljobs(datelist) corehrslt = 0.0 corehrsgt = 0.0 for job in jobs: if 'sw' in job.queue: if job.cores < 12: corehrslt += job.cores*job.walltime else: corehrsgt += job.cores*job.walltime if corehrslt > 0 and corehrsgt > 0: frac = corehrslt/(corehrslt + corehrsgt) print(date + " " + str(frac)) swfracs.append(frac) print("===Mean===") print(str(np.mean(swfracs)) + "+/-" + str(np.std(swfracs)/len(swfracs))) return
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] jobs = jobstats.alljobs(joblist) nodes = {} for job in jobs: if 'debug' not in job.queue and \ 'lmgpu' not in job.queue and \ 'scalemp' not in job.queue and \ 'class' not in job.queue: for node in job.nodes: if node not in nodes: nodes[node] = jobstats.nodeClass(job, node) else: nodes[node].joblist.append(job) for node in nodes: if len(nodes[node].joblist) > 20 and \ (nodes[node].avgEfficiency() < 0.1 or \ nodes[node].fractionBad() > 0.9): nodes[node].printStats()
def main(): """ Main routine: Print stats about ppn usage """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[1:] usernames = {} jobs = jobstats.alljobs(joblist) for job in jobs: if job.user in usernames: usernames[job.user].addJob(job) else: usernames[job.user] = jobstats.userClass(job) for user in sorted(usernames): if any(1 < job.ppn < 12 for job in usernames[user].joblist): print(user + ':') usernames[user].printTopProp('ppn') return
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 3: sys.exit('Usage: ' + sys.argv[0] + ' username ' \ + ' [Accounting files]') else: username = sys.argv[1] joblist = sys.argv[2:] user = None jobs = jobstats.alljobs(joblist) qtimes = list() for job in jobs: if username in job.user: qtimes.append(job.tiq) if user == None: user = jobstats.userClass(job) else: user.addJob(job) user.printAllJobs() print("mean qtime: (" + str(np.mean(qtimes)/3600.0) + " +/- " \ + str(np.std(qtimes)/(np.sqrt(len(qtimes))*3600.0)) + ") hours") print("max qtime: " + str(max(qtimes)/3600.0) + " hours") print("number of jobs: " + str(len(user.joblist))) return
def main(): """ Main routine: Show information about jobs with negative exit codes """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[1:] probjobs = None jobs = jobstats.alljobs(joblist) for job in jobs: if job.efficiency() > 10000.0: print(str(job.id) + " " + str(job.efficiency())) if job.exitcode < 0: if probjobs != None: probjobs.addJob(job) else: probjobs = jobstats.jobGroup(job) probjobs.printStats() print("Top Users:") probjobs.printTopProp('user') print("Top Nodes:") probjobs.printTopProp('node') print("Top Queues:") probjobs.printTopProp('queue') print("Top Exit Codes:") probjobs.printTopProp('exitcode') print("Super efficient Jobs:") probjobs.printSuperEff() jobs.printSuperEff() return
def main(): if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] jobs = jobstats.alljobs(joblist) qts = list() wallts = list() corehrs = list() allqts = list() for job in jobs: if job.cores > 1 and job.tiq > 0 and not np.isnan(job.tiq): allqts.append(np.log10(job.tiq/3600.0)) corehrs.append(np.log10(job.cores*job.walltimereq/3600.0)) if job.cores == 1 and job.tiq > 0 and not np.isnan(job.tiq): qts.append(np.log10(job.tiq/3600.0)) wallts.append(np.log10(job.walltimereq/3600.0)) print('max qt:' + str(max(qts))) print('avg qt:' + str(np.mean(qts)) + '+/-' + str(np.std(qts)/np.sqrt(len(qts)))) subx = list() suby = list() for i,j in zip(wallts,qts): if j > -1.0 and i > -1.0: subx.append(i) suby.append(j) z = np.polyfit(subx,suby,1) p = np.poly1d(z) plt.cla() plt.hexbin(wallts, qts, bins='log') plt.plot([min(qts), max(qts)], [min(qts), max(qts)], 'k--') plt.plot(subx, p(subx), 'k') plt.xlabel('log(Walltime (hours))') plt.ylabel('log(Queue Time (hours))') plt.savefig('QueueTimes.png') subx = list() suby = list() for i,j in zip(corehrs,allqts): if j > -1.0 and i > -1.0: subx.append(i) suby.append(j) z = np.polyfit(subx,suby,1) p = np.poly1d(z) plt.cla() plt.hexbin(corehrs, allqts, bins='log') plt.plot([min(allqts), max(allqts)], [min(allqts), max(allqts)], 'k--') plt.plot(subx, p(subx),'k') plt.xlabel('log(Core Hours)') plt.ylabel('log(Queue Time (hours))') plt.savefig('CHvsQT.png')
def main(): if len(sys.argv) < 2: sys.exit("Usage: " + sys.argv[0] + " [Accounting files]") else: joblist = sys.argv[1:] reset_time = dt.datetime(2013, 05, 10, 19, 30) reset_time_unix = time.mktime(reset_time.timetuple()) crash_times = list() crash_times.append(dt.datetime(2013, 05, 11, 0, 50)) crash_times.append(dt.datetime(2013, 05, 11, 7, 00)) crash_times.append(dt.datetime(2013, 05, 11, 7, 25)) crash_times.append(dt.datetime(2013, 05, 11, 8, 18)) crash_times.append(dt.datetime(2013, 05, 11, 8, 50)) crash_times.append(dt.datetime(2013, 05, 11, 10, 55)) crash_times.append(dt.datetime(2013, 05, 13, 2, 43)) crash_times.append(dt.datetime(2013, 05, 13, 6, 46)) crash_times.append(dt.datetime(2013, 05, 13, 7, 14)) crash_times.append(dt.datetime(2013, 05, 13, 11, 55)) crash_times_unix = list() for ct in crash_times: crash_times_unix.append(time.mktime(ct.timetuple())) jobs = jobstats.alljobs(joblist) ctimes = list() etimes = list() qtimes = list() starts = list() ends = list() for job in jobs: if job.ctime > reset_time_unix and job.start > 0: ctimes.append(job.ctime) etimes.append(job.etime) qtimes.append(job.qtime) starts.append(job.start) ends.append(job.end) plt.cla() f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, sharex=True, sharey=False, figsize=(30, 15)) ax1.hist(ctimes, bins=(max(ctimes) - min(ctimes)) / 900, color="k", label="Created") ax1.vlines(crash_times_unix, 0, ax1.get_ylim()[1], "b") ax1.legend(loc=2, borderaxespad=0.0) ax2.hist(etimes, bins=(max(etimes) - min(etimes)) / 900, color="k", label="Eligible") ax2.vlines(crash_times_unix, 0, ax2.get_ylim()[1], "b") ax2.legend(loc=2, borderaxespad=0.0) ax3.hist(qtimes, bins=(max(qtimes) - min(qtimes)) / 900, color="k", label="Queued") ax3.vlines(crash_times_unix, 0, ax3.get_ylim()[1], "b") ax3.legend(loc=2, borderaxespad=0.0) ax4.hist(starts, bins=(max(starts) - min(starts)) / 900, color="k", label="Started") ax4.vlines(crash_times_unix, 0, ax4.get_ylim()[1], "b") ax4.legend(loc=2, borderaxespad=0.0) ax5.hist(ends, bins=(max(ends) - min(ends)) / 900, color="k", label="Finished") ax5.vlines(crash_times_unix, 0, ax5.get_ylim()[1], "b") ax5.legend(loc=2, borderaxespad=0.0) plt.xlabel("Unix Epoch Time") f.subplots_adjust(hspace=0) plt.savefig("ctimes.png")
def main(): if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] jobs = jobstats.alljobs(joblist) qts = [job.tiq for job in jobs] if len(qts) > 0: meanqt = np.mean(qts) if(meanqt) > 0.0: errqt = np.std(qts)/np.sqrt(len(qts)) print(str(joblist).strip("[]'/tmp") + ' ' + str(meanqt) + ' ' + str(errqt))
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 3: sys.exit("Usage: " + sys.argv[0] + " username " + " [Accounting files]") else: queue = sys.argv[1] joblist = sys.argv[2:] jobs = jobstats.alljobs(joblist) qnum = 0 print("searching queue " + queue) for job in jobs: if queue in job.queue: qnum += 1 print("Queue " + queue + " used in " + str(qnum) + " of " + str(len(jobs)) + " jobs") return
def main(): """ Main routine: Scans through jobs and identifies jobs that were running at the specified time """ #Parse command-line options usage = "usage: %prog [options] [time (HH:MM:DD or Epoch time)] [Accounting files]" parser = OptionParser(usage=usage) parser.add_option("-n", "--node", dest="node", action="append",\ type="string", help="Search only for specified node(s)") parser.add_option("-d", "--date", dest="date", action="store",\ type="string", help="Specify a date in YYYY-MM-DD format (default: today)") parser.add_option("-s", "--summary", dest="summarize", action="store_true",\ default=False, help="Print a summary of each job (default: jobID only)") (options, args) = parser.parse_args() if len(args) < 2: sys.exit(usage + '\n --help for list of options') else: time = gettime(args[0], options.date) joblist = args[1:] #Filters prevent storing undesired jobs in the jobs list filters = None if options.node != None: filters = options.node jobs = jobstats.alljobs(joblist, filters) for job in jobs: if job.wasRunningAt(time): #Skip jobs on nodes that weren't specified if options.node != None: if not any(n in job.nodes for n in options.node): continue if options.summarize: job.printStats() else: print job.id return
def main(): """ Main routine: Cross reference a list of nodes to find common jobs """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[2:] jobs = jobstats.alljobs(joblist) nodelist = ['sw-2r13-n50', 'sw-2r13-n51', 'sw-2r14-n20', 'sw-2r15-n17', 'sw-2r15-n44', 'sw-2r15-n66'] print('cross referencing nodes:') print(nodelist) for job in jobs: if all(node in job.nodes for node in nodelist): job.printStats() return
def main(): """ Main routine: Process total core days used by all jobs """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[1:] jobs = jobstats.alljobs(joblist) coredays = 0.0 for job in jobs: coredays += job.cores*job.walltime coredays /= 24.0*3600.0 print( '%.1f' %coredays) return
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] corehours = np.zeros(1000) jobs = jobstats.alljobs(joblist) for job in jobs: if job.cores < 1000: corehours[job.cores] += job.cores*job.walltime/3600.0 plt.cla() plt.bar(range(1,1001), list(corehours)) plt.xlabel('Cores') plt.ylabel('Core hours') plt.savefig('corehoursvprocs.png') return
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 3: sys.exit('Usage: ' + sys.argv[0] + ' username ' \ + ' [Accounting files]') else: username = sys.argv[1] joblist = sys.argv[2:] user = None jobs = jobstats.alljobs(joblist) for job in jobs: if username in job.user: if user == None: user = jobstats.user(job) else: user.addJob(job) user.printStats() return
def main(): """ Main routine: Print User stats """ if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] \ + ' [Accounting files]') else: joblist = sys.argv[1:] exitCodes = {} jobs = jobstats.alljobs(joblist) for job in jobs: if job.exitcode not in exitCodes: exitCodes[job.exitcode] = 1 else: exitCodes[job.exitcode] += 1 sortedecs = sorted(exitCodes.iteritems(), key=operator.itemgetter(1)) for ec, quant in sortedecs: print ec, 100.0*quant/len(jobs) return
def main(): if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] jobs = jobstats.alljobs(joblist) qts_ppn = list() qts_procs = list() corehrs_ppn = list() corehrs_procs = list() for job in jobs: if job.cores > 1 and job.tiq > 0 and not np.isnan(job.tiq): if job.ppn == 0: qts_procs.append(np.log10(job.tiq/3600.0)) corehrs_procs.append(np.log10(job.cores*job.walltimereq/3600.0)) else: qts_ppn.append(np.log10(job.tiq/3600.0)) corehrs_ppn.append(np.log10(job.cores*job.walltimereq/3600.0)) subx_procs = list() suby = list() for i,j in zip(corehrs_procs,qts_procs): if j > -1.0 and i > -1.0: subx_procs.append(i) suby.append(j) z_procs = np.polyfit(subx_procs,suby,1) p_procs = np.poly1d(z_procs) print(z_procs) subx_ppn = list() suby = list() for i,j in zip(corehrs_ppn, qts_ppn): if j > -1.0 and i > -1.0: subx_ppn.append(i) suby.append(j) z_ppn = np.polyfit(subx_ppn,suby,1) p_ppn = np.poly1d(z_ppn) print(z_ppn) plt.cla() plt.hexbin(corehrs_procs, qts_procs, bins='log') #plt.plot([min(corehrs_procs), max(corehrs_procs)], [min(qts_procs), max(qts_procs)], 'k--') plt.plot(subx_ppn, p_ppn(subx_ppn), 'y', linewidth=4) plt.plot(subx_procs, p_procs(subx_procs), 'w', linewidth=4) plt.xlabel('log(Core hours requested)') plt.ylabel('log(Queue Time (hours))') plt.savefig('procs.png') plt.cla() plt.hexbin(corehrs_ppn, qts_ppn, bins='log') # plt.plot([min(qts_ppn), max(qts_ppn)], [min(qts_ppn), max(qts_ppn)], 'k--') plt.plot(subx_procs, p_procs(subx_procs), 'w', linewidth=4) plt.plot(subx_ppn, p_ppn(subx_ppn),'y', linewidth=4) plt.xlabel('log(Core hours requested)') plt.ylabel('log(Queue Time (hours))') plt.savefig('ppn.png')
def main(): if len(sys.argv) < 2: sys.exit('Usage: ' + sys.argv[0] + ' [Accounting files]') else: joblist = sys.argv[1:] reset_time = dt.datetime(2013, 05, 10, 19, 30) reset_time_unix = time.mktime(reset_time.timetuple()) crash_times = list() crash_times.append(dt.datetime(2013, 05, 11, 0, 50)) crash_times.append(dt.datetime(2013, 05, 11, 7, 00)) crash_times.append(dt.datetime(2013, 05, 11, 7, 25)) crash_times.append(dt.datetime(2013, 05, 11, 8, 18)) crash_times.append(dt.datetime(2013, 05, 11, 8, 50)) crash_times.append(dt.datetime(2013, 05, 11, 10, 55)) crash_times.append(dt.datetime(2013, 05, 13, 2, 43)) crash_times.append(dt.datetime(2013, 05, 13, 6, 46)) crash_times.append(dt.datetime(2013, 05, 13, 7, 14)) crash_times.append(dt.datetime(2013, 05, 13, 11, 31)) crash_times.append(dt.datetime(2013, 05, 13, 11, 41)) crash_times.append(dt.datetime(2013, 05, 13, 11, 55)) crash_times_unix = list() for ct in crash_times: crash_times_unix.append(time.mktime(ct.timetuple())) jobs = jobstats.alljobs(joblist) ctimes = list() etimes = list() qtimes = list() starts = list() ends = list() for job in jobs: if job.ctime > reset_time_unix and job.start > 0: print(job.start) if job.ctime < 0 or job.etime < 0 or job.qtime < 0 or job.start < 0 or job.end < 0: print(job.id) ctimes.append(job.ctime) etimes.append(job.etime) qtimes.append(job.qtime) starts.append(job.start) ends.append(job.end) plt.cla() f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, sharex=True, sharey=False, figsize=(20, 10)) ax1.hist(ctimes, bins=(max(ctimes) - min(ctimes))/900, color='k', label='Created') ax1.vlines(crash_times_unix, 0, ax1.get_ylim()[1], 'b') ax1.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.) ax2.hist(etimes, bins=(max(etimes) - min(etimes))/900, color='k', label='Eligible') ax2.vlines(crash_times_unix, 0, ax2.get_ylim()[1], 'b') ax2.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.) ax3.hist(qtimes, bins=(max(qtimes) - min(qtimes))/900, color='k', label='Queued') ax3.vlines(crash_times_unix, 0, ax3.get_ylim()[1], 'b') ax3.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.) ax4.hist(starts, bins=(max(starts) - min(starts))/900, color='k', label='Started') ax4.vlines(crash_times_unix, 0, ax4.get_ylim()[1], 'b') ax4.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.) ax5.hist(ends, bins=(max(ends) - min(ends))/900, color='k', label='Finished') ax5.vlines(crash_times_unix, 0, ax5.get_ylim()[1], 'b') ax5.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.) plt.xlabel('Unix Epoch Time') f.subplots_adjust(hspace=0, right=0.8) plt.savefig('ctimes.temp.png')