def plot_processor_load(jobset, ax=None, title="Load", labels=True): """ Display the impact of each job on the load of each processor. need: execution_time, jobID, allocated_processors """ # Get current axe to plot if ax is None: ax = plt.gca() def _draw_rect(ax, base, width, height, color, label): rect = mpatch.Rectangle(base, width, duration, alpha=0.2, color=color) if label: annotate(ax, rect, label) ax.add_artist(rect) RGB_tuples = generate_color_set(16) load = {p: 0.0 for p in range(*jobset.res_bounds)} for row in jobset.df.itertuples(): color = RGB_tuples[row.Index % len(RGB_tuples)] duration = row.execution_time label = row.jobID if labels else None procset = sorted( interval_set_to_set( string_to_interval_set( str(row.allocated_processors) ) ) ) base = (procset[0], load[procset[0]]) width = 0 # width is incremented in the first loop iteration for proc in procset: if base[0] + width != proc or load[proc] != base[1]: # we cannot merge across processors: draw the current # rectangle, and start anew _draw_rect(ax, base, width, duration, color, label) base = (proc, load[proc]) width = 1 else: # we can merge across processors: extend width, and continue width += 1 load[proc] += duration # draw last pending rectangle if necessary if width > 0: _draw_rect(ax, base, width, duration, color, label) ax.set_xlim(jobset.res_bounds) ax.set_ylim(0, 1.02 * max(load.values())) ax.grid(True) ax.set_title(title) ax.set_xlabel('proc. id') ax.set_ylabel('load / s')
def __init__(self, filename): df = pd.read_csv(filename) for col_name in ['time', 'new_pstate', 'machine_id']: assert(col_name in df), "Invalid input file: should contain a '{}' "\ "column".format(col_name) assert(df['time'].count() > 0), "Invalid input file: should contain at least 1 row" init = df.loc[df['time'] == 0] # Let's initialize the pstate of each machine current_pstate = {} assert(init['time'].count() > 0), "Invalid input file: no init row (one at time = 0)" for index, row in init.iterrows(): time = row['time'] pstate = row['new_pstate'] res_str = row['machine_id'] res_intervals = string_to_interval_set(res_str) res_set = interval_set_to_set(res_intervals) for res in res_set: assert(res not in current_pstate),"Invalid input file: multiple initialization of "\ "machine {res}".format(res) current_pstate[res] = (pstate, time) # Let's add a finish row all_machines = set([res for res in current_pstate]) all_machines_str = interval_set_to_string(set_to_interval_set(all_machines)) finish_df = pd.DataFrame(index=[0], columns=['time', 'machine_id', 'new_pstate']) finish_df.iloc[0] = [float('inf'), all_machines_str, 42] df = df.append(finish_df) # Let's traverse the dataframe to create rectangles after_init = df.loc[df['time'] > 0] jobs = [] after_init = after_init.sort_values(by = 'time') for index, row in after_init.iterrows(): time = row['time'] pstate = row['new_pstate'] res_str = row['machine_id'] res_intervals = string_to_interval_set(res_str) res_set = interval_set_to_set(res_intervals) # All resources of this row had their pstate changed # Let's group them by previous pstate and by time to create 'jobs' previous_pstates = {} for res in res_set: previous_pstate, previous_time = current_pstate[res] if (previous_pstate, previous_time) in previous_pstates: previous_pstates[(previous_pstate, previous_time)].add(res) else: previous_pstates[(previous_pstate, previous_time)] = set([res]) # Let's create the different 'jobs' for (previous_pstate, previous_time) in previous_pstates: res_set = previous_pstates[(previous_pstate, previous_time)] res_str = ' '.join(str(x) for x in res_set) res_intervals = string_to_interval_set(res_str) job = [previous_time, time, previous_pstate, res_intervals] jobs.append(job) # Let's update current pstate for res in res_set: current_pstate[res] = (pstate, time) # Let's create a 'jobs' dataframe self.pseudo_jobs = pd.DataFrame(index = range(len(jobs)), columns = ['begin', 'end', 'pstate', 'interval_id']) # Let's traverse all the jobs: # 1. Let's put resource intervals in another field # 2. Let's use a resource_id instead # 3. Let's add it into the dataframe self.intervals = {} for i in range(len(jobs)): (begin, end, pstate, res_intervals) = jobs[i] self.intervals[i] = res_intervals self.pseudo_jobs.loc[i] = [begin, end, pstate, i] # compute resources bounds # (+1 for max because of visu alignment over the job number line) self.res_bounds = ( min(all_machines), max(all_machines) + 1)