Ejemplo n.º 1
0
def plot_processor_load(jobset, ax=None, title="Load", labels=True):
    """
    Display the impact of each job on the load of each processor.

    need: execution_time, jobID, allocated_processors
    """

    # Get current axe to plot
    if ax is None:
        ax = plt.gca()

    def _draw_rect(ax, base, width, height, color, label):
        rect = mpatch.Rectangle(base, width, duration, alpha=0.2, color=color)
        if label:
            annotate(ax, rect, label)
        ax.add_artist(rect)

    RGB_tuples = generate_color_set(16)
    load = {p: 0.0 for p in range(*jobset.res_bounds)}

    for row in jobset.df.itertuples():
        color = RGB_tuples[row.Index % len(RGB_tuples)]
        duration = row.execution_time
        label = row.jobID if labels else None

        procset = sorted(
            interval_set_to_set(
                string_to_interval_set(
                    str(row.allocated_processors)
                )
            )
        )
        base = (procset[0], load[procset[0]])
        width = 0  # width is incremented in the first loop iteration
        for proc in procset:
            if base[0] + width != proc or load[proc] != base[1]:
                # we cannot merge across processors: draw the current
                # rectangle, and start anew
                _draw_rect(ax, base, width, duration, color, label)
                base = (proc, load[proc])
                width = 1
            else:
                # we can merge across processors: extend width, and continue
                width += 1
            load[proc] += duration

        # draw last pending rectangle if necessary
        if width > 0:
            _draw_rect(ax, base, width, duration, color, label)

    ax.set_xlim(jobset.res_bounds)
    ax.set_ylim(0, 1.02 * max(load.values()))
    ax.grid(True)
    ax.set_title(title)
    ax.set_xlabel('proc. id')
    ax.set_ylabel('load / s')
Ejemplo n.º 2
0
    def __init__(self, filename):
        df = pd.read_csv(filename)

        for col_name in ['time', 'new_pstate', 'machine_id']:
            assert(col_name in df), "Invalid input file: should contain a '{}' "\
                                    "column".format(col_name)
        assert(df['time'].count() > 0), "Invalid input file: should contain at least 1 row"

        init = df.loc[df['time'] == 0]

        # Let's initialize the pstate of each machine
        current_pstate = {}
        assert(init['time'].count() > 0), "Invalid input file: no init row (one at time = 0)"

        for index, row in init.iterrows():
            time = row['time']
            pstate = row['new_pstate']
            res_str = row['machine_id']
            res_intervals = string_to_interval_set(res_str)
            res_set = interval_set_to_set(res_intervals)

            for res in res_set:
                assert(res not in current_pstate),"Invalid input file: multiple initialization of "\
                                                  "machine {res}".format(res)
                current_pstate[res] = (pstate, time)

        # Let's add a finish row
        all_machines = set([res for res in current_pstate])
        all_machines_str = interval_set_to_string(set_to_interval_set(all_machines))

        finish_df = pd.DataFrame(index=[0], columns=['time', 'machine_id', 'new_pstate'])
        finish_df.iloc[0] = [float('inf'), all_machines_str, 42]
        df = df.append(finish_df)

        # Let's traverse the dataframe to create rectangles
        after_init = df.loc[df['time'] > 0]
        jobs = []
        after_init = after_init.sort_values(by = 'time')

        for index, row in after_init.iterrows():
            time = row['time']
            pstate = row['new_pstate']
            res_str = row['machine_id']
            res_intervals = string_to_interval_set(res_str)
            res_set = interval_set_to_set(res_intervals)

            # All resources of this row had their pstate changed
            # Let's group them by previous pstate and by time to create 'jobs'
            previous_pstates = {}

            for res in res_set:
                previous_pstate, previous_time = current_pstate[res]
                if (previous_pstate, previous_time) in previous_pstates:
                    previous_pstates[(previous_pstate, previous_time)].add(res)
                else:
                    previous_pstates[(previous_pstate, previous_time)] = set([res])

            # Let's create the different 'jobs'
            for (previous_pstate, previous_time) in previous_pstates:
                res_set = previous_pstates[(previous_pstate, previous_time)]
                res_str = ' '.join(str(x) for x in res_set)
                res_intervals = string_to_interval_set(res_str)

                job = [previous_time, time, previous_pstate, res_intervals]
                jobs.append(job)

                # Let's update current pstate
                for res in res_set:
                    current_pstate[res] = (pstate, time)

        # Let's create a 'jobs' dataframe
        self.pseudo_jobs = pd.DataFrame(index = range(len(jobs)),
                                        columns = ['begin', 'end', 'pstate',
                                                   'interval_id'])
        # Let's traverse all the jobs:
        #   1. Let's put resource intervals in another field
        #   2. Let's use a resource_id instead
        #   3. Let's add it into the dataframe
        self.intervals = {}

        for i in range(len(jobs)):
            (begin, end, pstate, res_intervals) = jobs[i]
            self.intervals[i] = res_intervals
            self.pseudo_jobs.loc[i] = [begin, end, pstate, i]

        # compute resources bounds
        # (+1 for max because of visu alignment over the job number line)
        self.res_bounds = (
            min(all_machines),
            max(all_machines) + 1)