def getUserReport_hourly(cluster, start='', stop='', top=5, account=None): # get top 5 user for each resource fname = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_usage_day_table.csv") df = pandas.read_csv( fname, usecols=['id', 'id_tres', 'alloc_secs', 'time_start'], dtype={'time_start': int}) st, stp, df = MyTool.getDFBetween(df, 'time_start', start, stop) #constrain by time sumDf = df.groupby(['id_tres', 'id']).sum() #sum over user fname1 = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_table.csv") userDf = pandas.read_csv(fname1, usecols=['id_assoc', 'user', 'acct'], index_col=0) sumDf = sumDf.join(userDf, on='id') if account: sumDf = sumDf[sumDf['acct'] == account] cpuIdx = sumDf.loc[(1, )].nlargest(top, 'alloc_secs').index memIdx = sumDf.loc[(2, )].nlargest(top, 'alloc_secs').index nodeIdx = sumDf.loc[(4, )].nlargest(top, 'alloc_secs').index gpuIdx = sumDf.loc[(1001, )].nlargest(top, 'alloc_secs').index #refine top users' data using hour_table fname2 = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_usage_hour_table.csv") df = pandas.read_csv( fname2, usecols=['id', 'id_tres', 'time_start', 'alloc_secs']) st, stp, df = MyTool.getDFBetween(df, 'time_start', start, stop) # get top users data only dfg = df.groupby(['id_tres', 'id']) tresSer = { 1: [], 2: [], 4: [], 1001: [] } # {1: [{'data': [[ms,value],...], 'name': uid},...], 2:...} idxSer = {1: cpuIdx, 2: memIdx, 4: nodeIdx, 1001: gpuIdx} for tres in [1, 2, 4, 1001]: for uid in idxSer[tres]: topDf = dfg.get_group((tres, uid)) topDf['ts_ms'] = topDf['time_start'] * 1000 topDf['alloc_ratio'] = topDf['alloc_secs'] / 3600 topLst = topDf[['ts_ms', 'alloc_ratio']].values.tolist() tresSer[tres].append({ 'data': topLst, 'name': userDf.loc[uid, 'user'] + "(" + userDf.loc[uid, 'acct'] + ")" }) return st, stp, tresSer
def sum_assoc_usage_day(cluster): # read in one year's usage table fname = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_usage_day_table.csv") df = pandas.read_csv(fname, dtype={'time_start': int}) start = int(time.time()) - 365 * 24 * 3600 # 1 years' history start, stop, df = MyTool.getDFBetween(df, 'time_start', start, None) # join with user fname1 = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_table.csv") userDf = pandas.read_csv(fname1, usecols=['id_assoc', 'user', 'acct'], index_col=0) rlt = df.join(userDf, on='id') rlt.to_csv("{}/{}_{}".format( CSV_DIR, cluster, "assoc_usage_day_1year_combine_table.csv"), index=False) # get summary data rlt = rlt[['id_tres', 'user', 'alloc_secs']] dfg = rlt.groupby(['id_tres', 'user']) sum_df = dfg.sum() df_lst = [] for idx in [1, 2, 4, 1001]: #cpu, mem, node, gpu tres_df = sum_df.loc[idx, ] tres_df = tres_df.sort_values('alloc_secs', ascending=False) tres_df = tres_df.reset_index('user') tres_df['id_tres'] = idx tres_df['rank'] = tres_df.index + 1 df_lst.append(tres_df) sum_df = pandas.concat(df_lst, ignore_index=True) sum_df.to_csv("{}/{}_{}".format(CSV_DIR, cluster, "assoc_usage_day_1year_sum_table.csv"), index=False)
def sum_job_step(cluster, days=30): start = int(time.time()) - days * ONE_DAY_SECS step_df = SlurmDBQuery.readClusterTable(cluster, 'step_table', [ 'job_db_inx', 'id_step', 'user_sec', 'user_usec', 'sys_sec', 'sys_usec', 'time_start' ]) s1, s2, step_df = MyTool.getDFBetween(step_df, 'time_start', start) dfg = step_df.groupby('job_db_inx') sum_df = dfg.sum() sum_df.insert( 0, 'total_cpu', sum_df.user_sec + sum_df.sys_sec + (sum_df.user_sec + sum_df.sys_sec) / 1000000) sum_df = sum_df[['total_cpu']] #sum_df = sum_df.astype(int) # will lost int with join because of missing data #print("sum_df={}".format(sum_df)) job_df = SlurmDBQuery.readClusterTable(cluster, 'job_table') s1, s2, job_df = MyTool.getDFBetween(job_df, 'time_start', start) comb_df = job_df.join(sum_df, on='job_db_inx') comb_df.to_csv("{}/{}_{}".format(CSV_DIR, cluster, "job_step_sum_table.csv"), index=False) # job_df is more than sum_df
def readJobTable(cluster, start=None, stop=None, fld_lst=None, index_col=None, time_col='time_submit'): f_name = "{}/{}_{}".format(CSV_DIR, cluster, "job_table.csv") df = pandas.read_csv(f_name, usecols=fld_lst, index_col=index_col) if time_col and (start or stop): logger.debug("start={},stop={}".format(start, stop)) start, stop, df = MyTool.getDFBetween(df, time_col, start, stop) return start, stop, df
def getUserDoneJobReport( uid, cluster='slurm', days=3, output='JobID,JobIDRaw,JobName,AllocCPUS,AllocTRES,State,ExitCode,User,NodeList,Start,End' ): if days > 30: # only 30 days of history is saved return None job_df = SlurmDBQuery.readClusterTable(cluster, 'job_step_sum_table') start = int(time.time()) - days * ONE_DAY_SECS s1, s2, job_df = MyTool.getDFBetween(job_df, 'time_start', start) user_job_df = job_df[job_df['id_user'] == uid] return user_job_df
def readClusterTableBetween(cluster, part_table_name, fld_lst, start=None, stop=None, index_col=None, ts_col=None): df = SlurmDBQuery.readClusterTable(cluster, part_table_name, fld_lst, index_col) if ts_col: start, stop, df = MyTool.getDFBetween(df, ts_col, start, stop) return start, stop, df else: return 0, 0, df
def getNodeRunJobs(self, node, start, stop): df = pandas.read_csv(CSV_DIR + "slurm_cluster_job_table.csv", usecols=[ 'id_job', 'id_user', 'nodelist', 'nodes_alloc', 'state', 'time_start', 'time_end', 'time_suspended' ]) start, stop, df = MyTool.getDFBetween(df, 'time_start', start, stop) df = df[df['nodes_alloc'] > 0] #jobs running on node if node: criterion = df['nodelist'].map(lambda x: node in MyTool.nl2flat(x)) df = df[criterion] df['user'] = df['id_user'].map(lambda x: MyTool.getUser(x)) return df[[ 'id_job', 'user', 'time_start', 'time_end', 'time_suspended' ]]
def getAccountUsage_hourly(cluster, start='', stop=''): #cluster usage fname = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_usage_hour_table.csv") df = pandas.read_csv( fname, usecols=['id', 'id_tres', 'time_start', 'alloc_secs']) st, stp, df = MyTool.getDFBetween(df, 'time_start', start, stop) # get account's data, id_assoc (user) - account fname1 = "{}/{}_{}".format(CSV_DIR, cluster, "assoc_table.csv") userDf = pandas.read_csv(fname1, usecols=['id_assoc', 'acct'], index_col=0) # add acct to df df['acct'] = df['id'].map(userDf['acct']) df.drop('id', axis=1, inplace=True) # sum over the same id_tres, acct, time_start sumDf = df.groupby(['id_tres', 'acct', 'time_start']).sum() sumDf['ts_ms'] = sumDf.index.get_level_values('time_start') * 1000 sumDf['alloc_ratio'] = sumDf[ 'alloc_secs'] / 3600 #1 sec on node1 and 1 sec on node2 =? 2/3600 node return st, stp, sumDf
def getClusterUsage_hourly(cluster, start, stop): #read from csv, TODO: deleted=0 for all data now fname = "{}/{}_{}".format(CSV_DIR, cluster, "usage_hour_table.csv") df = pandas.read_csv(fname, usecols=[ 'id_tres', 'time_start', 'count', 'alloc_secs', 'down_secs', 'pdown_secs', 'idle_secs', 'resv_secs', 'over_secs' ]) start, stop, df = MyTool.getDFBetween(df, 'time_start', start, stop) df['total_secs'] = df['alloc_secs'] + df['down_secs'] + df[ 'pdown_secs'] + df['idle_secs'] + df['resv_secs'] df['tdown_secs'] = df['down_secs'] + df['pdown_secs'] df = df[df['count'] * 3600 == df['total_secs']] # count =? count of cores df['ts_ms'] = df['time_start'] * 1000 dfg = df.groupby('id_tres') cpuDf = dfg.get_group(1) memDf = dfg.get_group(2) #eneDf = dfg.get_group(3) #nodeDf = dfg.get_group(4) not available return start, stop, cpuDf, memDf