Example #1
0
def pairs_lines(results, 
                outcomes_to_show = [],
                group_by = None,
                grouping_specifiers = None,
                ylabels = {},
                legend=True,
                **kwargs):
    '''
    
    Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ 
    lines multiplot. It shows the behavior of two outcomes over time against
    each other. The origin is denoted with a circle and the end is denoted
    with a '+'. 
    
    :param results: return from perform_experiments.
    :param outcomes_to_show: list of outcome of interest you want to plot. If 
                             empty, all outcomes are plotted.
    :param group_by: name of the column in the cases array to group results by.
                     Alternatively, `index` can be used to use indexing arrays 
                     as the basis for grouping.
    :param grouping_specifiers: set of categories to be used as a basis for 
                                grouping by. Grouping_specifiers is only 
                                meaningful if group_by is provided as well. In
                                case of grouping by index, the grouping 
                                specifiers should be in a dictionary where the
                                key denotes the name of the group. 
    :param ylabels: ylabels is a dictionary with the outcome names as keys, the 
                    specified values will be used as labels for the y axis. 
    :param legend: boolean, if true, and there is a column specified for 
                   grouping, show a legend.
    :param point_in_time: the point in time at which the scatter is to be made.
                          If None is provided, the end states are used. 
                          point_in_time should be a valid value on time
    :rtype: a `figure <http://matplotlib.sourceforge.net/api/figure_api.html>`_ instance
            and a dict with the individual axes.
    
    '''
    
    #unravel return from run_experiments   
    debug("making a pars lines plot")
    
    prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, 
                                       grouping_specifiers, None)
    outcomes, outcomes_to_show, grouping_labels = prepared_data
    
    grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show))                             
    grid.update(wspace = 0.1,
                hspace = 0.1)
    
    #the plotting
    figure = plt.figure()
    axes_dict = {}
    
    if group_by and legend:
        make_legend(grouping_labels, figure)
     
    combis = [(field1, field2) for field1 in outcomes_to_show\
                               for field2 in outcomes_to_show]
    
    for field1, field2 in combis:
        i = outcomes_to_show.index(field1)
        j = outcomes_to_show.index(field2)
        ax = figure.add_subplot(grid[i,j])
        axes_dict[(field1, field2)] = ax

        if group_by:
            for x, entry in enumerate(grouping_labels):
                data1 = outcomes[entry][field1]
                data2 = outcomes[entry][field2]
                color = COLOR_LIST[x]
                if i==j: 
                    color = 'white'
                simple_pairs_lines(ax, data1, data2, color)
        else:
            data1 = outcomes[field1]
            data2 = outcomes[field2]
            color = 'b'
            if i==j: 
                color = 'white'
            simple_pairs_lines(ax, data1, data2, color)
        do_text_ticks_labels(ax, i, j, field1, field2, ylabels, 
                             outcomes_to_show)
            

    return figure, axes_dict
Example #2
0
def pairs_scatter(results, 
                  outcomes_to_show = [],
                  group_by = None,
                  grouping_specifiers = None,
                  ylabels = {},
                  legend=True,
                  point_in_time=-1,
                  filter_scalar=True,
                  **kwargs):
    '''
    
    Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ 
    scatter multiplot. In case of time-series data, the end states are used.
    
    :param results: return from perform_experiments.
    :param outcomes_to_show: list of outcome of interest you want to plot. If 
                             empty, all outcomes are plotted.
    :param group_by: name of the column in the cases array to group results by.
                     Alternatively, `index` can be used to use indexing arrays 
                     as the basis for grouping.
    :param grouping_specifiers: set of categories to be used as a basis for 
                                grouping by. Grouping_specifiers is only 
                                meaningful if group_by is provided as well. In
                                case of grouping by index, the grouping 
                                specifiers should be in a dictionary where the
                                key denotes the name of the group. 
    :param ylabels: ylabels is a dictionary with the outcome names as keys, the 
                    specified values will be used as labels for the y axis. 
    :param legend: boolean, if true, and there is a column specified for 
                   grouping, show a legend.
    :param point_in_time: the point in time at which the scatter is to be made.
                          If None is provided, the end states are used. 
                          point_in_time should be a valid value on time
    :param filter_scalar: boolean, remove the non-time-series outcomes.  
                          Defaults to True.
    :rtype: a `figure <http://matplotlib.sourceforge.net/api/figure_api.html>`_ instance
            and a dict with the individual axes.
    

    .. note:: the current implementation is limited to seven different 
          categories in case of column, categories, and/or discretesize.
          This limit is due to the colors specified in COLOR_LIST. 
    '''
    
    debug("generating pairwise scatter plot")
   
    prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, 
                                       grouping_specifiers, point_in_time,
                                       filter_scalar)
    outcomes, outcomes_to_show, grouping_labels = prepared_data
   
    grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show))                             
    grid.update(wspace = 0.1,
                hspace = 0.1)    
    
    
    #the plotting
    figure = plt.figure()
    axes_dict = {}
    
    if group_by and legend:
        make_legend(grouping_labels, figure, legend_type='scatter')
     
    combis = [(field1, field2) for field1 in outcomes_to_show\
                               for field2 in outcomes_to_show]
    
    for field1, field2 in combis:
        i = outcomes_to_show.index(field1)
        j = outcomes_to_show.index(field2)
        ax = figure.add_subplot(grid[i,j])
        axes_dict[(field1, field2)] = ax

        if group_by:
            for x, group in enumerate(grouping_labels):
                y_data = outcomes[group][field1]
                x_data = outcomes[group][field2]
                
                facecolor = COLOR_LIST[x]
                edgecolor = 'k'
                if i==j: 
                    facecolor = 'white'
                    edgecolor = 'white'
                ax.scatter(x_data, y_data, 
                           facecolor=facecolor, edgecolor=edgecolor)
        else:
            y_data = outcomes[field1]
            x_data = outcomes[field2]

            facecolor = 'b'
            edgecolor = 'k'
            if i==j: 
                facecolor = 'white'
                edgecolor = 'white'
            ax.scatter(x_data, y_data, 
                       facecolor=facecolor, edgecolor=edgecolor)
        do_text_ticks_labels(ax, i, j, field1, field2, ylabels, 
                             outcomes_to_show)

    return figure, axes_dict
Example #3
0
def pairs_density(results, 
                  outcomes_to_show = [],
                  group_by = None,
                  grouping_specifiers = None,
                  ylabels = {},
                  point_in_time=-1,
                  log=True,
                  gridsize=50,
                  colormap='jet',
                  filter_scalar=True): 
    '''
    
    Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ 
    hexbin density multiplot. In case of time-series data, the end states are 
    used.
    
    hexbin makes hexagonal binning plot of x versus y, where x, y are 1-D 
    sequences of the same length, N. If C is None (the default), this is a 
    histogram of the number of occurences of the observations at (x[i],y[i]).
    For further detail see `matplotlib on hexbin <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hexbin>`_
    
    :param results: return from perform_experiments.
    :param outcomes_to_show: list of outcome of interest you want to plot. If 
                             empty, all outcomes are plotted.
    :param group_by: name of the column in the cases array to group results by.
                     Alternatively, `index` can be used to use indexing arrays 
                     as the basis for grouping.
    :param grouping_specifiers: set of categories to be used as a basis for 
                                grouping by. Grouping_specifiers is only 
                                meaningful if group_by is provided as well. In
                                case of grouping by index, the grouping 
                                specifiers should be in a dictionary where the
                                key denotes the name of the group. 
    :param ylabels: ylabels is a dictionary with the outcome names as keys, the 
                    specified values will be used as labels for the y axis. 
    :param point_in_time: the point in time at which the scatter is to be made.
                          If None is provided, the end states are used. 
                          point_in_time should be a valid value on time
    :param log: boolean, indicating whether density should be log scaled. 
                Defaults to True.
    :param gridsize: controls the gridsize for the hexagonal binning
    :param cmap: color map that is to be used in generating the hexbin. For 
                 details on the available maps, 
                 see `pylab <http://matplotlib.sourceforge.net/examples/pylab_examples/show_colormaps.html#pylab-examples-show-colormaps>`_.
                 (Defaults = jet)
    :param filter_scalar: boolean, remove the non-time-series outcomes.  
                          Defaults to True.
    :rtype: a `figure <http://matplotlib.sourceforge.net/api/figure_api.html>`_ instance
            and a dict with the individual axes.
    
    '''
    debug("generating pairwise density plot")
    
    prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, 
                                       grouping_specifiers, point_in_time,
                                       filter_scalar)
    outcomes, outcomes_to_show, grouping_specifiers = prepared_data
   
    if group_by:
        #figure out the extents for each combination
        extents = determine_extents(outcomes, outcomes_to_show)
        
        axes_dicts = {}
        figures = []
        for key, value in outcomes.items():
            figure, axes_dict = simple_pairs_density(value, outcomes_to_show, 
                                       log, colormap, gridsize, ylabels,
                                       extents=extents, title=key)
            axes_dicts[key] = axes_dict
            figures.append(figure)
        
        # harmonize the color scaling across figures
        combis = [(field1, field2) for field1 in outcomes_to_show\
                           for field2 in outcomes_to_show]
        for combi in combis:
            vmax = -1
            for entry in axes_dicts.values():
                vmax =  max(entry[combi].collections[0].norm.vmax, vmax)
            for entry in axes_dicts.values():
                ax = entry[combi]
                ax.collections[0].set_clim(vmin=0, vmax=vmax)
            del vmax
            
        return figures, axes_dicts
    else:
        return simple_pairs_density(outcomes, outcomes_to_show, log, colormap, 
                                    gridsize, ylabels)