Ejemplo n.º 1
0
def compute_overall_utilisation():
    '''
    Use all the swf file present in the curent directory to compute
    overall utilisation of the clusters.
    Write results to "overall_utilisation.csv" and return it.
    '''
    files = [f for f in glob('./swf_files/*.swf')]
    res = pd.Series()
    i = 0
    for f in files:
        print('Loading SWF file: {}'.format(f))
        try:
            wl = workload.Workload.from_csv(f)
            norm_util_mean = load_mean(wl.utilisation) / wl.MaxProcs
            print('Mean Util: {}\n'.format(norm_util_mean))
            res.set_value(f, norm_util_mean)
            i = i + 1
        except AttributeError as e:
            print("Unable to compute normalize mean: {}".format(e))
        finally:
            if wl:
                del wl
    print('{}'.format(res))
    print('{}'.format(res.mean()))
    print(res.describe())
    res.to_csv("overall_utilisation.csv")
    return res
Ejemplo n.º 2
0
def plot_load(load, nb_resources=None, ax=None, normalize=False,
              time_scale=False, load_label="load",
              UnixStartTime=0, TimeZoneString='UTC'):
    '''
    Plots the number of used resources against time
    :normalize: if True normalize by the number of resources
    `nb_resources`
    '''
    mean = metrics.load_mean(load)
    u = load.copy()

    if time_scale:
        # make the time index a column
        u = u.reset_index()
        # convert timestamp to datetime
        u.index = pd.to_datetime(u['time'] + UnixStartTime,
                                 unit='s')
        u.index.tz_localize('UTC').tz_convert(TimeZoneString)

    if normalize and nb_resources is None:
        nb_resources = u.load.max()

    if normalize:
        u.load = u.load / nb_resources
        mean = mean / nb_resources

    # get an axe if not provided
    if ax is None:
        ax = plt.gca()

    # leave room to have better view
    ax.margins(x=0.1, y=0.1)

    # plot load
    u.load.plot(drawstyle="steps-post", ax=ax, label=load_label)

    # plot a line for max available area
    if nb_resources and not normalize:
        ax.plot([u.index[0], u.index[-1]],
                [nb_resources, nb_resources],
                linestyle='-', linewidth=2,
                label="Maximum resources ({})".format(nb_resources))

    # plot a line for mean utilisation
    ax.plot([u.index[0], u.index[-1]],
            [mean, mean],
            linestyle='--', linewidth=1,
            label="Mean {0} ({1:.2f})".format(load_label, mean))
    sns.rugplot(u.load[u.load == 0].index, ax=ax, color='r')
    ax.scatter([], [], marker="|", linewidth=1, s=200,
               label="Reset event ({} == 0)".format(load_label), color='r')
    # FIXME: Add legend when this bug is fixed
    # https://github.com/mwaskom/seaborn/issues/1071

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
Ejemplo n.º 3
0
    def extract_periods_with_given_utilisation(self,
                                               period_in_hours,
                                               utilisation,
                                               variation=0.01,
                                               nb_max=None):
        '''
        This extract from the workload a period (in hours) with a given mean
        utilisation (between 0 and 1).

        :returns:
            a list of workload of the given periods, with the given
            utilisation, extracted from the this workload.
        '''
        norm_util = self.utilisation

        # resample the dataframe with the given period
        time_periods = np.arange(min(norm_util.index),
                                 max(norm_util.index),
                                 60 * 60 * period_in_hours)
        mean_df = pd.DataFrame()
        for index, val in enumerate(time_periods):
            if index == len(time_periods) - 1 or index == 0:
                continue
            begin = val
            end = time_periods[index + 1]

            mean_df = mean_df.append(
                {"begin": begin,
                 "end": end,
                 "mean_util": load_mean(norm_util,
                                        begin=begin,
                                        end=end)},
                ignore_index=True)

        mean_df["norm_mean_util"] = mean_df.mean_util / self.MaxProcs
        periods = mean_df.loc[
            lambda x: x.norm_mean_util >= (utilisation - variation)].loc[
                lambda x: x.norm_mean_util <= (utilisation + variation)
            ]

        # Only take nb_max periods if it is defined
        if nb_max:
            periods = periods[:nb_max]

        notes = ("Period of {} hours with a mean utilisation "
                 "of {}".format(period_in_hours, utilisation))
        return self.extract(periods, notes)
Ejemplo n.º 4
0
    def extract_periods_with_given_utilisation(self,
                                               period_in_hours,
                                               utilisation,
                                               variation=0.01,
                                               nb_max=None):
        '''
        This extract from the workload a period (in hours) with a given mean
        utilisation (between 0 and 1).

        :returns:
            a list of workload of the given periods, with the given
            utilisation, extracted from the this workload.
        '''
        norm_util = self.utilisation

        # resample the dataframe with the given period
        time_periods = np.arange(min(norm_util.index), max(norm_util.index),
                                 60 * 60 * period_in_hours)
        mean_df = pd.DataFrame()
        for index, val in enumerate(time_periods):
            if index == len(time_periods) - 1 or index == 0:
                continue
            begin = val
            end = time_periods[index + 1]

            mean_df = mean_df.append(
                {
                    "begin": begin,
                    "end": end,
                    "mean_util": load_mean(norm_util, begin=begin, end=end)
                },
                ignore_index=True)

        mean_df["norm_mean_util"] = mean_df.mean_util / self.MaxProcs
        periods = mean_df.loc[lambda x: x.norm_mean_util >=
                              (utilisation - variation)].loc[
                                  lambda x: x.norm_mean_util <=
                                  (utilisation + variation)]

        # Only take nb_max periods if it is defined
        if nb_max:
            periods = periods[:nb_max]

        notes = ("Period of {} hours with a mean utilisation "
                 "of {}".format(period_in_hours, utilisation))
        return self.extract(periods, notes)
Ejemplo n.º 5
0
def plot_load(load,
              nb_resources=None,
              ax=None,
              normalize=False,
              time_scale=False,
              load_label="load",
              UnixStartTime=0,
              TimeZoneString='UTC'):
    '''
    Plots the number of used resources against time
    :normalize: if True normalize by the number of resources
    `nb_resources`
    '''
    mean = metrics.load_mean(load)
    u = load.copy()

    if time_scale:
        # make the time index a column
        u = u.reset_index()
        # convert timestamp to datetime
        u.index = pd.to_datetime(u['time'] + UnixStartTime, unit='s')
        u.index.tz_localize('UTC').tz_convert(TimeZoneString)

    if normalize and nb_resources is None:
        nb_resources = u.load.max()

    if normalize:
        u.load = u.load / nb_resources
        mean = mean / nb_resources

    # get an axe if not provided
    if ax is None:
        ax = plt.gca()

    # leave room to have better view
    ax.margins(x=0.1, y=0.1)

    # plot load
    u.load.plot(drawstyle="steps-post", ax=ax, label=load_label)

    # plot a line for max available area
    if nb_resources and not normalize:
        ax.plot([u.index[0], u.index[-1]], [nb_resources, nb_resources],
                linestyle='-',
                linewidth=2,
                label="Maximum resources ({})".format(nb_resources))

    # plot a line for mean utilisation
    ax.plot([u.index[0], u.index[-1]], [mean, mean],
            linestyle='--',
            linewidth=1,
            label="Mean {0} ({1:.2f})".format(load_label, mean))
    sns.rugplot(u.load[u.load == 0].index, ax=ax, color='r')
    ax.scatter([], [],
               marker="|",
               linewidth=1,
               s=200,
               label="Reset event ({} == 0)".format(load_label),
               color='r')
    # FIXME: Add legend when this bug is fixed
    # https://github.com/mwaskom/seaborn/issues/1071

    # ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax.grid(True)
    ax.legend()
    ax.set_title(load_label)
Ejemplo n.º 6
0
 def mean_utilisation(self, begin_time=None, end_time=None):
     return load_mean(self.utilisation, begin=begin_time, end=end_time)
Ejemplo n.º 7
0
    def extract_periods_with_given_utilisation(self,
                                               period_in_hours,
                                               utilisation,
                                               variation=0.01,
                                               nb_max=None,
                                               merge_basic=False,
                                               merge_change_submit_times=False,
                                               randomize_starting_times=False,
                                               random_seed=0,
                                               max_nb_jobs=None):
        '''
        This extract from the workload a period (in hours) with a given mean
        utilisation (between 0 and 1).

        :returns:
            a list of workload of the given periods, with the given
            utilisation, extracted from the this workload.
        '''
        norm_util = self.utilisation

        # resample the dataframe with the given period
        if randomize_starting_times:
            np.random.seed(random_seed)
            c = np.random.choice(norm_util.index, size=50)
            time_periods = np.compress(
                c <= max(norm_util.index) - period_in_hours * (60 * 60), c)
        else:
            time_periods = np.arange(min(norm_util.index),
                                     max(norm_util.index),
                                     60 * 60 * period_in_hours)
        mean_df = pd.DataFrame()
        for index, val in enumerate(time_periods):
            if index == len(time_periods) - 1 or index == 0:
                continue
            begin = val
            end = val + period_in_hours * (60 * 60)

            mean_df = mean_df.append(
                {
                    "begin": begin,
                    "end": end,
                    "mean_util": load_mean(norm_util, begin=begin, end=end)
                },
                ignore_index=True)

        mean_df["norm_mean_util"] = mean_df.mean_util / self.MaxProcs
        periods = mean_df.loc[lambda x: x.norm_mean_util >=
                              (utilisation - variation)].loc[
                                  lambda x: x.norm_mean_util <=
                                  (utilisation + variation)]

        # Only take nb_max periods if it is defined
        if nb_max:
            periods = periods[:nb_max]

        notes = ("Period of {} hours with a mean utilisation "
                 "of {}".format(period_in_hours, utilisation))
        return self.extract(
            periods,
            notes,
            merge_basic=merge_basic,
            merge_change_submit_times=merge_change_submit_times,
            max_nb_jobs=max_nb_jobs)
Ejemplo n.º 8
0
 def mean_utilisation(self, begin_time=None, end_time=None):
     return load_mean(self.utilisation, begin=begin_time, end=end_time)