def getSlurmDBJob(jid, req_fields=DEF_REQ_FLD): job = pyslurm.slurmdb_jobs().get(jobids=[jid]).get(jid, None) if not job: # cannot find return None job['user_id'] = MyTool.getUid(job['user']) for f in req_fields: if f in job: continue if f in PyslurmQuery.MAP_JOB2DBJ: # can be converted if type(PyslurmQuery.MAP_JOB2DBJ[f]) != list: db_fld = PyslurmQuery.MAP_JOB2DBJ[f] job[f] = job[db_fld] else: db_fld, cvtFunc = PyslurmQuery.MAP_JOB2DBJ[f] job[f] = cvtFunc(job[db_fld]) else: # cannot be converted logger.error( "Cannot find/map reqested job field {} in job {}".format( f, job)) return job
def test1(user): print("Test user {}'s history".format(user)) start, stop = MyTool.getStartStopTS(days=30) uid = MyTool.getUid(user) print(gendata_user(uid, start, stop))
def display_user_GPU(user_name): ts = int(time.time()) uid = MyTool.getUid(user) if not uid: print("{} User {} does not exist.".format(MyTool.getTsString(ts), user_name)) user_job_lst = PyslurmQuery.getUserCurrJobs(uid) if not user_job_lst: print("{} User {} does not have running jobs.".format( MyTool.getTsString(ts), user_name)) return node_dict = PyslurmQuery.getAllNodes() job_gpu_d = dict([(job['job_id'], PyslurmQuery.getJobAllocGPU(job, node_dict)) for job in user_job_lst]) u_node = [ node_name for g_alloc_d in job_gpu_d.values() for node_name in g_alloc_d ] u_gpu_cnt = sum([ len(g_lst) for g_alloc_d in job_gpu_d.values() for g_lst in g_alloc_d.values() ]) g_union = reduce(lambda rlt, curr: rlt.union(set(curr)), [ g_lst for g_alloc_d in job_gpu_d.values() for g_lst in g_alloc_d.values() ], set()) print("{} User {} has {} running jobs,\talloc {} GPUs on {} GPU nodes.". format(MyTool.getTsString(ts), user_name, len(user_job_lst), u_gpu_cnt, len(u_node))) #get gpu data if u_node: #GPU nodes allocated gpu_data = BrightRestClient().getGPU(u_node, min([ job['start_time'] for job in user_job_lst if job_gpu_d[job['job_id']] ]), list(g_union), msec=False) else: gpu_data = {} print("\t{:10}{:20}{:>16}{:>20}{:>25}".format("Jid", "Job run time", "Node.GPU", "Job avg util", "Avg util (5,10,30min)")) for job in user_job_lst: jid = job['job_id'] j_run_time = str(datetime.timedelta(seconds=ts - job['start_time'])) j_first_ln = True if not job_gpu_d[jid]: # job not using GPU print("\t{:<10}{:20}{:>16}".format(job['job_id'], j_run_time, 'No GPU')) continue for node, g_lst in job_gpu_d[jid].items(): for g in g_lst: g_name = '{}.gpu{}'.format(node, g) g_data = gpu_data[g_name] g_avg = MyTool.getTimeSeqAvg(g_data, job['start_time'], ts) g_avg1 = MyTool.getTimeSeqAvg(g_data, ts - 5 * 60, ts) g_avg2 = MyTool.getTimeSeqAvg(g_data, ts - 10 * 60, ts) g_avg3 = MyTool.getTimeSeqAvg(g_data, ts - 30 * 60, ts) if j_first_ln: print( "\t{:<10}{:20}{:>16}{:>20.2f}{:>10.2f},{:>6.2f},{:>6.2f}" .format(jid, j_run_time, g_name, g_avg * 100, g_avg1 * 100, g_avg2 * 100, g_avg3 * 100)) j_first_ln = False else: print( "\t{:<10}{:20}{:>16}{:>20.2f}{:>10.2f},{:>6.2f},{:>6.2f}" .format('', '', g_name, g_avg * 100, g_avg1 * 100, g_avg2 * 100, g_avg3 * 100))