Ejemplo n.º 1
0
def plot_cdf(resultset, desc, filename, plotdir):
    """Plot a CDF with characteristics described in the plot descriptor
    out of the data contained in the resultset and save the plot in given
    directory.
    
    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.
    
    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.
    
    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label. The default value is 'Cumulative probability'
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * metric : list
         A list of values representing the metric to plot. These values are the
         path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. The metric must be a CDF.
         Example values could be ['LATENCY', 'CDF'].
     * filter : dict, optional
         A dictionary of values to filter in the resultset.
         Example: {'network_cache': 0.004, 'topology_name': 'GEANT'}
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th line on the graph will represent. If
         all lines are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N lines all lines of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each line of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of lines to plot. If not specified, all lines are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of tuples, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of lines to plot. If not specified,
         all lines are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * xscale : str, optional
         The scale of x axis. Options allowed are 'linear' and 'log'. 
         Default value is 'linear'
     * yscale : str, optional
         The scale of y axis. Options allowed are 'linear' and 'log'.
         Default value is 'linear'
     * step : bool, optional
         If *True* draws the CDF with steps. Default value is *True*
     * line_style : dict, optional
         Dictionary mapping each value of yvals with a line style
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True* 
    """
    fig = plt.figure()
    if 'title' in desc:
        plt.title(desc['title'])
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    plt.ylabel(desc['ylabel'] if 'ylabel' in desc else 'Cumulative probability')
    if 'xscale' in desc:
        plt.xscale(desc['xscale'])
    if 'yscale' in desc:
        plt.yscale(desc['yscale'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    step = desc['step'] if 'step' in desc else True
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True
    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError('ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each line
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(ymetrics, ycondvals)
    else:
        yvals = ymetrics
    x_min = np.infty
    x_max = - np.infty
    empty = True
    for i in range(len(yvals)):
        condition = Tree(desc['filter'])
        if ycondnames is not None:
            condition.setval(ycondnames[i], ycondvals[i])      
        data = [v.getval(ymetrics[i])
                for _, v in resultset.filter(condition)
                if v.getval(ymetrics[i]) is not None]
        # If there are more than 1 CDFs in the resultset, take the first one
        if data:
            x_cdf, y_cdf = data[0]
            if step:
                x_cdf, y_cdf = step_cdf(x_cdf, y_cdf)
        else:
            x_cdf, y_cdf = [], []
        fmt = desc['line_style'][yvals[i]] if 'line_style' in desc \
              and yvals[i] in desc['line_style'] else '-'
        # This check is to prevent crashing when trying to plot arrays of nan
        # values with axes log scale
        if all(np.isnan(x) for x in x_cdf) or all(np.isnan(y) for y in y_cdf):
            plt.plot([], [], fmt)
        else:
            plt.plot(x_cdf, y_cdf, fmt)
            empty = False
            x_min = min(x_min, x_cdf[0])
            x_max = max(x_max, x_cdf[-1])
    if empty and not plot_empty:
        return
    plt.xlim(x_min, x_max)
    if 'legend' in desc:
        legend = [desc['legend'][l] for l in desc['yvals']]
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend(legend, prop={'size': LEGEND_SIZE}, **legend_args)
    plt.legend(legend, prop={'size': LEGEND_SIZE}, loc=desc['legend_loc'])
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 2
0
def plot_bar_chart(resultset, desc, filename, plotdir):
    """Plot a bar chart with characteristics described in the plot descriptor
    out of the data contained in the resultset and save the plot in given
    directory.
    
    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.
    
    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.
    
    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label
     * errorbar : bool, optional
         If *True* error bars will be plotted. Default value is *True*
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * filter : tree or dict of dicts, optional
         A tree or nested dictionary of values to include from the resultset.
         Example: {'cache_placement': {'network_cache': 0.004},
         'topology': {'name', 'GEANT'}}.
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together.
     * xparam : tuple
         The path of the x axis metric, e.g. ('workload', 'alpha')
     * xvals : list
         Range of x values, e.g. [0.6, 0.7, 0.8, 0.9]
     * xticks : list, optional
         Names to display as ticks. If not specified, xvals is used instead
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th bar on the graph will represent. If
         all bars are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N bars all bar of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each bar of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of bars to plot. If not specified, all bars are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of tuples, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of bars to plot. If not specified,
         all bars are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * placement : (grouped | stacked | [x, y, ...])
         Defines how to place bars in the plot. If grouped, defaults, all
         bars for a specific xval are grouped next to each other, if stacked,
         they are plot on top of each other. It is also possible to specify a
         custom grouped+stacked placement with a list of integers, in which
         the number of items is the number of columns and the actual value of
         an items is the number of metrics stacked on the column. For example
         [4,2,3] means plotting 4 + 2 + 3 = 9 metrics: 4 stacked in the first
         column, 2 stacked on the second and 3 stacked on the third  
         If *True*, draw all bars of a group stacked on top of each other.
         Default value is *False*.
     * group_width : float, default: 0.4
         Width of a group of bars
     * bar_color : dict, optional
         Dictionary mapping each value of yvals with a bar color
     * bar_hatch : dict, optional
         Dictionary mapping each value of yvals with a bar hatch. If set to
         None all bars will be plotted without hatch. If not set, hatches will
         be plotted randomly
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True*
     * ymax: float, optional
        The upper limit of the y axis. If not specified, it is automatically
        selected by Matplotlib
    """
    fig = plt.figure()
    if 'title' in desc:
        plt.title(desc['title'])
    plt.subplot(111)
    plt.grid(b=True, which='major', color='k', axis='y', linestyle='--')
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    if 'ylabel' in desc:
        plt.ylabel(desc['ylabel'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True
    
    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError('ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each bar
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(ymetrics, ycondvals)
    else:
        yvals = ymetrics
    placement = desc['placement'] if 'placement' in desc else 'grouped'
    if placement == 'grouped':
        placement = [1 for _ in range(len(yvals))]
    elif placement == 'stacked':
        placement = [len(yvals)]
    else:
        if sum(placement) != len(yvals):
            raise ValueError('Placement definition incorrect. '
                             'The sum of values of the list must be equal to '
                             'the number of y values')
    xticks = desc['xticks'] if 'xticks' in desc else desc['xvals']
    empty = True
    # Spacing attributes
    # width of a group of bars
    group_width = desc['group_width'] if 'group_width' in desc else 0.4 
    width = group_width/len(placement)          # width of a single bar
    separation = width/2                        # space between adjacent groups
    border = 0.6 * separation                   # left and right borders
        
    elem = collections.defaultdict(int)         # bar objects (for legend)
    # Select colors and hatches
    if 'bar_color' in desc and all(y in desc['bar_color'] for y in yvals):
        color = desc['bar_color']
    elif len(yvals) <= len(BW_COLOR_CATALOGUE):
        color = dict((y, BW_COLOR_CATALOGUE[yvals.index(y)]) for y in yvals)
    else:
        color = collections.defaultdict(lambda: None)
    if 'bar_hatch' in desc and desc['bar_hatch'] is None:
        hatch = collections.defaultdict(lambda: None)
    elif 'bar_hatch' in desc and all(y in desc['bar_hatch'] for y in yvals):
        hatch = desc['bar_hatch']
    elif len(yvals) <= len(BW_COLOR_CATALOGUE):
        hatch = dict((y, HATCH_CATALOGUE[yvals.index(y)]) for y in yvals)
    else:
        hatch = collections.defaultdict(lambda: None)
    # Plot bars
    left = border    # left-most point of the bar about to draw
    for i in range(len(desc['xvals'])):
        l = 0
        for x in placement:
            bottom = 0   # Bottom point of a bar. It is alway 0 if stacked is False
            for y in range(x):
                condition = Tree(desc['filter'])
                condition.setval(desc['xparam'], desc['xvals'][i])
                if ycondnames is not None:
                    condition.setval(ycondnames[l], ycondvals[l])
                data = [v.getval(ymetrics[i])
                        for _, v in resultset.filter(condition)
                        if v.getval(ymetrics[i]) is not None]
                confidence = desc['confidence'] if 'confidence' in desc else 0.95 
                meanval, err = means_confidence_interval(data, confidence)
                yerr = None if 'errorbar' in desc and not desc['errorbar'] else err
                if not np.isnan(meanval):
                    empty = False
                elem[yvals[l]] = plt.bar(left, meanval, width,
                                         color=color[yvals[l]], 
                                         yerr=yerr, bottom=bottom, ecolor='k',
                                         hatch=hatch[yvals[l]], label=yvals[l])
                bottom += meanval
                l += 1
            left += width
        left += separation
    if empty and not plot_empty:
        return
    n_bars = len(placement)
    plt.xticks(border + 0.5*(n_bars*width) + 
               (separation + n_bars*width)*np.arange(len(xticks)),
               xticks)
    if 'legend' in desc:
        legend = [desc['legend'][l] for l in yvals] if 'legend'in desc else yvals
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend([elem[x] for x in yvals], legend,
                   prop={'size': LEGEND_SIZE},
                   **legend_args)
    xmin, _ = plt.xlim()
    plt.xlim(xmin, left - separation + border)
    if 'ymax' in desc:
        plt.ylim(ymax=desc['ymax'])
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 3
0
def plot_lines(resultset, desc, filename, plotdir):
    """Plot a graph with characteristics described in the plot descriptor out
    of the data contained in the resultset and save the plot in given directory.
    
    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.
    
    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.
    
    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label
     * errorbar : bool, optional
         If *True* error bars will be plotted. Default value is *True*
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * xparam : iterable
         Path to the value of the x axis metric, e.g. ['workload', 'alpha']
     * xvals : list
         Range of x values, e.g. [0.6, 0.7, 0.8, 0.9]
     * filter : dict, optional
         A dictionary of values to filter in the resultset.
         Example: {'network_cache': 0.004, 'topology_name': 'GEANT'}
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th line on the graph will represent. If
         all lines are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N lines all lines of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each line of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of lines to plot. If not specified, all lines are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of tuples, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of lines to plot. If not specified,
         all lines are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * xscale : ('linear' | 'log'), optional
         The scale of x axis. Default value is 'linear'
     * yscale : ('linear' | 'log'), optional
         The scale of y axis. Default value is 'linear'
     * xticks : list, optional
         Values to display as x-axis ticks.
     * yticks : list, optional
         Values to display as y-axis ticks.
     * line_style : dict, optional
         Dictionary mapping each value of yvals with a line style
     * plot_args : dict, optional
         Additional args to be provided to the Pyplot errorbar function.
         Example parameters that can be specified here are *linewidth* and
         *elinewidth*
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True*
     * xmin, xmax: float, optional
        The limits of the x axis. If not specified, they're set to the min and
        max values of xvals
     * ymin, ymax: float, optional
        The limits of the y axis. If not specified, they're automatically
        selected by Matplotlib
    """
    fig = plt.figure()
    _, ax1 = plt.subplots()
    if 'title' in desc:
        plt.title(desc['title'])
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    if 'ylabel' in desc:
        plt.ylabel(desc['ylabel'])
    if 'xscale' in desc:
        plt.xscale(desc['xscale'])
    if 'yscale' in desc:
        plt.yscale(desc['yscale'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    xvals = sorted(desc['xvals'])
    if 'xticks' in desc:
        ax1.set_xticks(desc['xticks'])
        ax1.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
        ax1.set_xticklabels([str(xtick) for xtick in desc['xticks']])
    if 'yticks' in desc:
        ax1.set_yticks(desc['yticks'])
        ax1.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
        ax1.set_yticklabels([str(ytick) for ytick in desc['yticks']])
    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError('ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each line
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(ymetrics, ycondvals)
    else:
        yvals = ymetrics
    plot_args = desc['plot_args'] if 'plot_args' in desc else {}
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True
    empty = True
    for i in range(len(yvals)):
        means = np.zeros(len(xvals))
        err = np.zeros(len(xvals))
        for j in range(len(xvals)):
            condition = Tree(desc['filter'])
            condition.setval(desc['xparam'], xvals[j])
            if ycondnames is not None:
                condition.setval(ycondnames[i], ycondvals[i])
            data = [v.getval(ymetrics[i])
                    for _, v in resultset.filter(condition)
                    if v.getval(ymetrics[i]) is not None]
            confidence = desc['confidence'] if 'confidence' in desc else 0.95 
            means[j], err[j] = means_confidence_interval(data, confidence)
        yerr = None if 'errorbar' in desc and not desc['errorbar'] or all(err == 0) else err
        fmt = desc['line_style'][yvals[i]] if 'line_style' in desc \
              and yvals[i] in desc['line_style'] else '-'
        # This check is to prevent crashing when trying to plot arrays of nan
        # values with axes log scale
        if all(np.isnan(x) for x in xvals) or all(np.isnan(y) for y in means):
            plt.errorbar([], [], fmt=fmt)
        else:
            plt.errorbar(xvals, means, yerr=yerr, fmt=fmt, **plot_args)
            empty = False
    if empty and not plot_empty:
        return
    x_min = desc['xmin'] if 'xmin' in desc else min(xvals)
    x_max = desc['xmax'] if 'xmax' in desc else max(xvals)
    plt.xlim(x_min, x_max)
    if 'ymin' in desc:
        plt.ylim(ymin=desc['ymin'])
    if 'ymax' in desc:
        plt.ylim(ymax=desc['ymax'])
    if 'legend' in desc:
        legend = [desc['legend'][l] for l in yvals]
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend(legend, prop={'size': LEGEND_SIZE}, **legend_args)
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 4
0
 def test_getset(self):
     tree = Tree()
     tree.setval([1, 2, 3, 4], 5)
     self.assertEqual(tree.getval([1, 2, 3, 4]), 5)
Ejemplo n.º 5
0
def plot_lines(resultset, desc, filename, plotdir):
    """Plot a graph with characteristics described in the plot descriptor out
    of the data contained in the resultset and save the plot in given directory.

    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.

    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.

    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label
     * errorbar : bool, optional
         If *True* error bars will be plotted. Default value is *True*
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * xparam : iterable
         Path to the value of the x axis metric, e.g. ['workload', 'alpha']
     * xvals : list
         Range of x values, e.g. [0.6, 0.7, 0.8, 0.9]
     * filter : dict, optional
         A dictionary of values to filter in the resultset.
         Example: {'network_cache': 0.004, 'topology_name': 'GEANT'}
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th line on the graph will represent. If
         all lines are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N lines all lines of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each line of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of lines to plot. If not specified, all lines are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of tuples, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of lines to plot. If not specified,
         all lines are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * xscale : ('linear' | 'log'), optional
         The scale of x axis. Default value is 'linear'
     * yscale : ('linear' | 'log'), optional
         The scale of y axis. Default value is 'linear'
     * xticks : list, optional
         Values to display as x-axis ticks.
     * yticks : list, optional
         Values to display as y-axis ticks.
     * line_style : dict, optional
         Dictionary mapping each value of yvals with a line style
     * plot_args : dict, optional
         Additional args to be provided to the Pyplot errorbar function.
         Example parameters that can be specified here are *linewidth* and
         *elinewidth*
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True*
     * xmin, xmax: float, optional
        The limits of the x axis. If not specified, they're set to the min and
        max values of xvals
     * ymin, ymax: float, optional
        The limits of the y axis. If not specified, they're automatically
        selected by Matplotlib
    """
    fig = plt.figure()
    _, ax1 = plt.subplots()
    if 'title' in desc:
        plt.title(desc['title'])
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    if 'ylabel' in desc:
        plt.ylabel(desc['ylabel'])
    if 'xscale' in desc:
        plt.xscale(desc['xscale'])
    if 'yscale' in desc:
        plt.yscale(desc['yscale'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    xvals = sorted(desc['xvals'])
    if 'xticks' in desc:
        ax1.set_xticks(desc['xticks'])
        ax1.get_xaxis().set_major_formatter(
            matplotlib.ticker.ScalarFormatter())
        ax1.set_xticklabels([str(xtick) for xtick in desc['xticks']])
    if 'yticks' in desc:
        ax1.set_yticks(desc['yticks'])
        ax1.get_yaxis().set_major_formatter(
            matplotlib.ticker.ScalarFormatter())
        ax1.set_yticklabels([str(ytick) for ytick in desc['yticks']])
    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError(
                'ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each line
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(
            ymetrics, ycondvals)
    else:
        yvals = ymetrics
    plot_args = desc['plot_args'] if 'plot_args' in desc else {}
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True
    empty = True
    for i in range(len(yvals)):
        means = np.zeros(len(xvals))
        err = np.zeros(len(xvals))
        for j in range(len(xvals)):
            condition = Tree(desc['filter'])
            condition.setval(desc['xparam'], xvals[j])
            if ycondnames is not None:
                condition.setval(ycondnames[i], ycondvals[i])
            data = [
                v.getval(ymetrics[i]) for _, v in resultset.filter(condition)
                if v.getval(ymetrics[i]) is not None
            ]
            confidence = desc['confidence'] if 'confidence' in desc else 0.95
            means[j], err[j] = means_confidence_interval(data, confidence)
        yerr = None if 'errorbar' in desc and not desc['errorbar'] or all(
            err == 0) else err
        fmt = desc['line_style'][yvals[i]] if 'line_style' in desc \
              and yvals[i] in desc['line_style'] else '-'
        # This check is to prevent crashing when trying to plot arrays of nan
        # values with axes log scale
        if all(np.isnan(x) for x in xvals) or all(np.isnan(y) for y in means):
            plt.errorbar([], [], fmt=fmt)
        else:
            plt.errorbar(xvals, means, yerr=yerr, fmt=fmt, **plot_args)
            empty = False
    if empty and not plot_empty:
        return
    x_min = desc['xmin'] if 'xmin' in desc else min(xvals)
    x_max = desc['xmax'] if 'xmax' in desc else max(xvals)
    plt.xlim(x_min, x_max)
    if 'ymin' in desc:
        plt.ylim(ymin=desc['ymin'])
    if 'ymax' in desc:
        plt.ylim(ymax=desc['ymax'])
    if 'legend' in desc:
        legend = [desc['legend'][l] for l in yvals]
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend(legend, prop={'size': LEGEND_SIZE}, **legend_args)
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 6
0
def plot_bar_chart(resultset, desc, filename, plotdir):
    """Plot a bar chart with characteristics described in the plot descriptor
    out of the data contained in the resultset and save the plot in given
    directory.

    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.

    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.

    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label
     * errorbar : bool, optional
         If *True* error bars will be plotted. Default value is *True*
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * filter : tree or dict of dicts, optional
         A tree or nested dictionary of values to include from the resultset.
         Example: {'cache_placement': {'network_cache': 0.004},
         'topology': {'name', 'GEANT'}}.
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together.
     * xparam : tuple
         The path of the x axis metric, e.g. ('workload', 'alpha')
     * xvals : list
         Range of x values, e.g. [0.6, 0.7, 0.8, 0.9]
     * xticks : list, optional
         Names to display as ticks. If not specified, xvals is used instead
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th bar on the graph will represent. If
         all bars are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N bars all bar of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each bar of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of bars to plot. If not specified, all bars are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of values, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of bars to plot. If not specified,
         all bars are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * placement : (grouped | stacked | [x, y, ...])
         Defines how to place bars in the plot. If grouped, defaults, all
         bars for a specific xval are grouped next to each other, if stacked,
         they are plot on top of each other. It is also possible to specify a
         custom grouped+stacked placement with a list of integers, in which
         the number of items is the number of columns and the actual value of
         an items is the number of metrics stacked on the column. For example
         [4,2,3] means plotting 4 + 2 + 3 = 9 metrics: 4 stacked in the first
         column, 2 stacked on the second and 3 stacked on the third
         If *True*, draw all bars of a group stacked on top of each other.
         Default value is *False*.
     * group_width : float, default: 0.4
         Width of a group of bars
     * bar_color : dict, optional
         Dictionary mapping each value of yvals with a bar color
     * bar_hatch : dict, optional
         Dictionary mapping each value of yvals with a bar hatch. If set to
         None all bars will be plotted without hatch. If not set, hatches will
         be plotted randomly
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True*
     * ymax: float, optional
        The upper limit of the y axis. If not specified, it is automatically
        selected by Matplotlib
    """
    fig = plt.figure()
    if 'title' in desc:
        plt.title(desc['title'])
    _, ax1 = plt.subplots()
    plt.grid(b=True, which='major', color='k', axis='y', linestyle=':')
    # Set axis below bars
    ax1.set_axisbelow(True)
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    if 'ylabel' in desc:
        plt.ylabel(desc['ylabel'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True

    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError(
                'ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each bar
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(
            ymetrics, ycondvals)
    else:
        yvals = ymetrics
    placement = desc['placement'] if 'placement' in desc else 'grouped'
    if placement == 'grouped':
        placement = [1 for _ in range(len(yvals))]
    elif placement == 'stacked':
        placement = [len(yvals)]
    else:
        if sum(placement) != len(yvals):
            raise ValueError('Placement definition incorrect. '
                             'The sum of values of the list must be equal to '
                             'the number of y values')
    xticks = desc['xticks'] if 'xticks' in desc else desc['xvals']
    empty = True
    # Spacing attributes
    # width of a group of bars
    group_width = desc['group_width'] if 'group_width' in desc else 0.4
    width = group_width / len(placement)  # width of a single bar
    separation = width / 2  # space between adjacent groups
    border = 0.6 * separation  # left and right borders

    elem = collections.defaultdict(int)  # bar objects (for legend)
    # Select colors and hatches
    if 'bar_color' in desc and all(y in desc['bar_color'] for y in yvals):
        color = desc['bar_color']
    elif len(yvals) <= len(BW_COLOR_CATALOGUE):
        color = dict((y, BW_COLOR_CATALOGUE[yvals.index(y)]) for y in yvals)
    else:
        color = collections.defaultdict(lambda: None)
    if 'bar_hatch' in desc and desc['bar_hatch'] is None:
        hatch = collections.defaultdict(lambda: None)
    elif 'bar_hatch' in desc and all(y in desc['bar_hatch'] for y in yvals):
        hatch = desc['bar_hatch']
    elif len(yvals) <= len(BW_COLOR_CATALOGUE):
        hatch = dict((y, HATCH_CATALOGUE[yvals.index(y)]) for y in yvals)
    else:
        hatch = collections.defaultdict(lambda: None)
    # Plot bars
    left = border  # left-most point of the bar about to draw
    for i in range(len(desc['xvals'])):
        l = 0
        for x in placement:
            bottom = 0  # Bottom point of a bar. It is alway 0 if stacked is False
            for y in range(x):
                condition = Tree(desc['filter'])
                condition.setval(desc['xparam'], desc['xvals'][i])
                if ycondnames is not None:
                    condition.setval(ycondnames[l], ycondvals[l])
                data = [
                    v.getval(ymetrics[l])
                    for _, v in resultset.filter(condition)
                    if v.getval(ymetrics[l]) is not None
                ]
                confidence = desc[
                    'confidence'] if 'confidence' in desc else 0.95
                meanval, err = means_confidence_interval(data, confidence)
                yerr = None if 'errorbar' in desc and not desc[
                    'errorbar'] else err
                if not np.isnan(meanval):
                    empty = False
                elem[yvals[l]] = plt.bar(left,
                                         meanval,
                                         width,
                                         color=color[yvals[l]],
                                         yerr=yerr,
                                         bottom=bottom,
                                         ecolor='k',
                                         hatch=hatch[yvals[l]],
                                         label=yvals[l])
                bottom += meanval
                l += 1
            left += width
        left += separation
    if empty and not plot_empty:
        return
    n_bars = len(placement)
    plt.xticks(
        border + 0.5 * (n_bars * width) +
        (separation + n_bars * width) * np.arange(len(xticks)), xticks)
    if 'legend' in desc:
        legend = [desc['legend'][l]
                  for l in yvals] if 'legend' in desc else yvals
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend([elem[x] for x in yvals],
                   legend,
                   prop={'size': LEGEND_SIZE},
                   **legend_args)
    xmin, _ = plt.xlim()
    plt.xlim(xmin, left - separation + border)
    if 'ymax' in desc:
        plt.ylim(ymax=desc['ymax'])
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 7
0
def plot_cdf(resultset, desc, filename, plotdir):
    """Plot a CDF with characteristics described in the plot descriptor
    out of the data contained in the resultset and save the plot in given
    directory.

    Parameters
    ----------
    rs : ResultSet
        Result set
    desc : dict
        The plot descriptor (more info below)
    filename : str
        The name used to save the file. The file format is determined by the
        extension of the file. For example, if this filename is 'foo.pdf', the
        file will be saved in pdf format.
    plotdir : str
        The directory in which the plot will be saved.

    Notes
    -----
    The plot descriptor is a dictionary with a set of values that describe how
    to make the plot.

    The dictionary can contain the following keys:
     * title : str, optional.
           The title of the graph
     * xlabel : str, optional
         The x label
     * ylabel : str, optional
         The y label. The default value is 'Cumulative probability'
     * confidence : float, optional
         The confidence used to plot error bars. Default value is 0.95
     * metric : list
         A list of values representing the metric to plot. These values are the
         path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. The metric must be a CDF.
         Example values could be ['LATENCY', 'CDF'].
     * filter : dict, optional
         A dictionary of values to filter in the resultset.
         Example: {'network_cache': 0.004, 'topology_name': 'GEANT'}
         If not specified or None, no filtering is executed on the results
         and possibly heterogeneous results may be plotted together
     * ymetrics : list of tuples
         List of metrics to be shown on the graph. The i-th metric of the list
         is the metric that the i-th line on the graph will represent. If
         all lines are for the same metric, then all elements of the list are
         equal.
         Each single metric (i.e. each element of the list) is a tuple modeling
         the path to identify a specific metric into an entry of a result set.
         Normally, it is a 2-value list where the first value is the name of
         the collector which measured the metric and the second value is the
         metric name. Example values could be ('CACHE_HIT_RATIO', 'MEAN'),
         ('LINK_LOAD', 'MEAN_INTERNAL') or ('LATENCY', 'MEAN').
         For example, if in a graph of N lines all lines of the graph show mean
         latency, then ymetrics = [('LATENCY', 'MEAN')]*5.
     * ycondnames : list of tuples, optional
         List of condition names specific to each line of the graph. Different
         from the conditions expressed in the filter parameter, which are
         global, these conditions are specific to one bar. Ech condition name,
         different from the filter parameter is a path to a condition to be
         checked, e.g. ('topology', 'name'). Values to be matched for this
         conditions are specified in ycondvals. This list must be as long as
         the number of lines to plot. If not specified, all lines are filtered
         by the conditions of filter parameter only, but in this case all
         ymetrics should be different.
     * ycondvals : list of tuples, optional
         List of values that the conditions of ycondnames must meet. This list
         must be as long as the number of lines to plot. If not specified,
         all lines are filtered by the conditions of filter parameter only,
         but in this case all ymetrics should be different.
     * xscale : str, optional
         The scale of x axis. Options allowed are 'linear' and 'log'.
         Default value is 'linear'
     * yscale : str, optional
         The scale of y axis. Options allowed are 'linear' and 'log'.
         Default value is 'linear'
     * step : bool, optional
         If *True* draws the CDF with steps. Default value is *True*
     * line_style : dict, optional
         Dictionary mapping each value of yvals with a line style
     * legend : dict, optional
         Dictionary mapping each value of yvals with a legend label. If not
         specified, it is not plotted. If you wish to plot it with the
         name of the line, set it to put yvals or ymetrics, depending on which
         one is used
     * legend_loc : str, optional
         Legend location, e.g. 'upper left'
     * legend_args : dict, optional
         Optional legend arguments, such as ncol
     * plotempty : bool, optional
         If *True*, plot and save graph even if empty. Default is *True*
    """
    fig = plt.figure()
    if 'title' in desc:
        plt.title(desc['title'])
    if 'xlabel' in desc:
        plt.xlabel(desc['xlabel'])
    plt.ylabel(desc['ylabel'] if 'ylabel' in
               desc else 'Cumulative probability')
    if 'xscale' in desc:
        plt.xscale(desc['xscale'])
    if 'yscale' in desc:
        plt.yscale(desc['yscale'])
    if 'filter' not in desc or desc['filter'] is None:
        desc['filter'] = {}
    step = desc['step'] if 'step' in desc else True
    plot_empty = desc['plotempty'] if 'plotempty' in desc else True
    ymetrics = desc['ymetrics']
    ycondnames = desc['ycondnames'] if 'ycondnames' in desc else None
    ycondvals = desc['ycondvals'] if 'ycondvals' in desc else None
    if ycondnames is not None and ycondvals is not None:
        if not len(ymetrics) == len(ycondnames) == len(ycondvals):
            raise ValueError(
                'ymetrics, ycondnames and ycondvals must have the same length')
        # yvals is basically the list of values that differentiate each line
        # it is used for legends and styles mainly
        yvals = ycondvals if len(set(ymetrics)) == 1 else zip(
            ymetrics, ycondvals)
    else:
        yvals = ymetrics
    x_min = np.infty
    x_max = -np.infty
    empty = True
    for i in range(len(yvals)):
        condition = Tree(desc['filter'])
        if ycondnames is not None:
            condition.setval(ycondnames[i], ycondvals[i])
        data = [
            v.getval(ymetrics[i]) for _, v in resultset.filter(condition)
            if v.getval(ymetrics[i]) is not None
        ]
        # If there are more than 1 CDFs in the resultset, take the first one
        if data:
            x_cdf, y_cdf = data[0]
            if step:
                x_cdf, y_cdf = step_cdf(x_cdf, y_cdf)
        else:
            x_cdf, y_cdf = [], []
        fmt = desc['line_style'][yvals[i]] if 'line_style' in desc \
              and yvals[i] in desc['line_style'] else '-'
        # This check is to prevent crashing when trying to plot arrays of nan
        # values with axes log scale
        if all(np.isnan(x) for x in x_cdf) or all(np.isnan(y) for y in y_cdf):
            plt.plot([], [], fmt)
        else:
            plt.plot(x_cdf, y_cdf, fmt)
            empty = False
            x_min = min(x_min, x_cdf[0])
            x_max = max(x_max, x_cdf[-1])
    if empty and not plot_empty:
        return
    plt.xlim(x_min, x_max)
    if 'legend' in desc:
        legend = [desc['legend'][l] for l in desc['yvals']]
        legend_args = desc['legend_args'] if 'legend_args' in desc else {}
        if 'legend_loc' in desc:
            legend_args['loc'] = desc['legend_loc']
        plt.legend(legend, prop={'size': LEGEND_SIZE}, **legend_args)
    plt.legend(legend, prop={'size': LEGEND_SIZE}, loc=desc['legend_loc'])
    plt.savefig(os.path.join(plotdir, filename), bbox_inches='tight')
    plt.close(fig)
Ejemplo n.º 8
0
 def test_getset(self):
     tree = Tree()
     tree.setval([1, 2, 3, 4], 5)
     self.assertEqual(tree.getval([1, 2, 3, 4]), 5)