Example #1
0
def get_vpstats(dataset, points=500, bw_method=None):
    def _kde_method(X, coords):
        if np.all(X[0] == X):
            return (X[0] == coords).astype(float)
        kde = mlab.GaussianKDE(X, bw_method)
        return kde.evaluate(coords)

    vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
    return vpstats
Example #2
0
def get_violinstats(dataset, points=100, bw_method=None):
    def _kde_method(X, coords):
        # fallback gracefully if the vector contains only one value
        if np.all(X[0] == X):
            return (X[0] == coords).astype(float)
        kde = mlab.GaussianKDE(X, bw_method)
        return kde.evaluate(coords)

    vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
    return vpstats
Example #3
0
def plot_bindist(obs_table, b, base_pdf=None):

    def _kde_method(X, coords):
            kde = mlab.GaussianKDE(X, None)
            return kde.evaluate(coords)

    obs = obs_table.Observable.unique()
    if len(obs) != 1:
        raise ValueError("Must be only one observable")
    obs = obs[0]
    figure, ax = plt.subplots()

    plt.title("%s [Bin %d]" % (obs, b+1))
    colors  = plotutils.color_names_to_rgb(colorlist)
    alpha = 1
    if base_pdf:
        base = obs_table[obs_table.PDF.get_values()==base_pdf].Result[0]
    else:
        base = None
    results = obs_table.Result.unique(
    )
    for result in results:
        if base is not None:
            cv = base.central_value.as_matrix()
            data = result._violin_data(rel_to=cv)
        else:
            data = data = result._violin_data()

        if isinstance(data, list):
            stats = data[b]
        else:
            stats = violin_stats(data, _kde_method)[b]

        color = next(colors)
        alphacolor = color + (alpha,)
        plt.plot(stats['coords'], stats['vals'], color=color,label=result.pdf.label)
        plt.fill_between(stats['coords'], 0, stats['vals'], color=alphacolor,
                 )

        alpha /= 2


    plt.ylabel("Distribution")
    if base_pdf:
        plt.xlabel('Ratio to %s' % base_pdf.label)
    else:
        plt.xlabel("Observable value")

    ax.yaxis.set_major_locator(MaxNLocator(nbins=10, prune="both"))
    ax.xaxis.set_major_locator(MaxNLocator(nbins=8, prune="both"))

    plt.legend()
    yield (obs, b), figure
Example #4
0
def custom_violin_stats(data, weights):
    # Get wquantiles median and mean (using wquantiles module for median)
    median = wquantiles.quantile_1D(data, weights, 0.5)
    mean, sumw = np.ma.average(data, weights=list(weights), returned=True)

    # Use matplotlib violin_stats, which expects a function that takes in data and coords
    # which we get from closure above
    results = violin_stats(data, vdensity_with_weights(weights))

    # Update result dictionary with our updated info
    results[0][u"mean"] = mean
    results[0][u"median"] = median

    # No need to do this, since it should be populated from violin_stats
    # results[0][u"min"] =  np.min(data)
    # results[0][u"max"] =  np.max(data)

    return results
def custom_violin_stats(data, weights):
    """Taken from https://colab.research.google.com/drive/1cSnJGKJEqbllkPbF2z0cnfdwT40sUKKR#scrollTo=RIcLIr5XJRmx"""
    # Get weighted median and mean (using weighted module for median)
    median = weighted.quantile_1D(data, weights, 0.5)
    mean, sumw = np.ma.average(data, weights=list(weights), returned=True)

    # Use matplotlib violin_stats, which expects a function that takes in data and coords
    # which we get from closure above
    results = violin_stats(data, vdensity_with_weights(weights))

    # Update result dictionary with our updated info
    results[0][u"mean"] = mean
    results[0][u"median"] = median

    # No need to do this, since it should be populated from violin_stats
    # results[0][u"min"] =  np.min(data)
    # results[0][u"max"] =  np.max(data)

    return results
Example #6
0
def violin_plot(data, normvalues=None, ax=None, bw_method=None, **kwargs):

    def _kde_method(X, coords):
            kde = mlab.GaussianKDE(X, bw_method)
            return kde.evaluate(coords)

    myargs = {}
    myargs.update(kwargs)
    if 'color' in myargs:
        color = myargs.pop('color')
    else:
        color = None
    if 'label' in myargs:
        label = myargs.pop('label')
    else:
        label = None

    if 'hatches' in myargs:
        hatches = myargs.pop('hatches')
    else:
        hatches = None

    if isinstance(data, list):
        stats = data
    else:
        stats = violin_stats(data, _kde_method)

    N = len(stats)

    if normvalues is not None:
        if np.isscalar(normvalues):
            normvalues = [normvalues] * N
        elif len(normvalues) != N:
            raise ValueError("Incorrect number of normvalues")

        widths = [normval*np.max(stat['vals']) for normval, stat
                  in zip(normvalues, stats)]
        myargs['widths'] = widths

    if 'widths' in myargs:
        widths = myargs['widths']
        if np.isscalar(widths):
            widths = [widths] * N
        elif len(widths) != N:
            raise ValueError("Incorrect number of widths")
        myargs['widths'] = widths
    else:
        myargs['widths'] = [0.5]*N



    ournorms = [w/np.max(stat['vals']) for w,stat in zip(myargs['widths'],
               stats)]

    vp = ax.violin(stats, **myargs)
    vp_edge = ax.violin(stats, **myargs)

    for pc , edge in zip(vp['bodies'], vp_edge['bodies']):
        if color:

            if len(color) == 4:
                pc.set_alpha(color[3])
                edge.set_alpha(1)
            pc.set_facecolor(color)
            pc.set_edgecolor('none')
            edge.set_edgecolor(color[:3])
            edge.set_facecolor('none')
        if hatches:
            pc.set_hatch(hatches)
    if label:
        if not color:
            color =  vp['bodies'][0].get_facecolor()[0]
        vp['bodies'][0].set_label(label)
        handle = mpatches.Patch(facecolor=color, label=label,
                                          hatch=hatches, edgecolor=color[:3])
    else:
        handle = None

    return vp, handle, ournorms
Example #7
0
def sinaplot(dataset, positions=None, vert=True, widths=0.5,
             showmeans=False, showextrema=True, showmedians=False, scaled=True,
             show_violin=False, points=100, bw_method=None, ax=None,
             scatter_kwargs=None, line_kwargs=None):
    """
    a cross between a violinplot and a scatterplot, from the R package sinaplot.
    Reimplemented by ripping off the violinplot function from matplotlib and 
    tweaking a few bits.
    """
    
    def _kde_method(X, coords):
        # fallback gracefully if the vector contains only one value
        if np.all(X[0] == X):
            return (X[0] == coords).astype(float)
        kde = GaussianKDE(X, bw_method)
        return kde.evaluate(coords)
    vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
    
    if ax is None:
        ax = plt.subplot()
    
    if scatter_kwargs is None:
        scatter_kwargs = dict(s=20, color='b', marker='o', alpha=0.9)
    scatter_color = scatter_kwargs.pop('color')
    
    if line_kwargs is None:
        line_kwargs = dict(color='red', linewidth='1')

    # Collections to be returned
    artists = {}

    N = len(vpstats)
    datashape_message = ("List of violinplot statistics and `{0}` "
                         "values must have the same length")

    # Validate positions
    if positions is None:
        positions = np.arange(1, N + 1)
    elif len(positions) != N:
        raise ValueError(datashape_message.format("positions"))
    positions = positions.reshape((N, 1))
        
    # Validate widths
    if np.isscalar(widths):
        widths = np.ones((N, 1)) * widths
    elif len(widths) != N:
        raise ValueError(datashape_message.format("widths"))
    widths = widths.reshape((N, 1))

    # Validate colors
    if isinstance(scatter_color, str):
        scatter_color = [scatter_color] * N
    elif len(scatter_color) != N:
        raise ValueError(datashape_message.format("scatter_color"))
    
    # Calculate ranges for statistics lines
    pmins = -0.25 * np.array(widths) + positions
    pmaxes = 0.25 * np.array(widths) + positions        

    # Check whether we are rendering vertically or horizontally
    if vert:
        fill = ax.fill_betweenx
        perp_lines = ax.hlines
        par_lines = ax.vlines
    else:
        fill = ax.fill_between
        perp_lines = ax.vlines
        par_lines = ax.hlines

    # Calculate jittered scatter positions and render sinaplots
    jittered = []
    means = []
    mins = []
    maxes = []
    medians = []
    
    scatter_color_flattened = []
    for i in range(N):
        x = dataset[:,i]
        xp = vpstats[i]['coords']
        fp = vpstats[i]['vals']
        #Uses numpy interpolate to estimate the limits for each points
        interp = np.interp(x, xp, fp)
        jittered.append(np.asarray([np.random.uniform(-r, r) for r in interp]))
        
        # append some stuff for the means/medians/extremities
        means.append(vpstats[i]['mean'])
        mins.append(vpstats[i]['min'])
        maxes.append(vpstats[i]['max'])
        medians.append(vpstats[i]['median'])
        scatter_color_flattened += [scatter_color[i],]*len(x)
        
    jittered = np.asarray(jittered)
    
    # get scale_factor (either scaled by largest value in all sinaplots or not scaled)
    if scaled:
        scale_factor = np.ones((N, 1)) * jittered.max()
    else:
        scale_factor = jittered.max(1)
        scale_factor = scale_factor.reshape((N, 1))
    
    jittered = 0.5 * widths * jittered / scale_factor + positions
    
    # add background density plots
    if show_violin:
        # Render violins
        bodies = []
        for stats, pos, width, sf, col in zip(vpstats, positions, widths, scale_factor, scatter_color):
            # The 0.5 factor reflects the fact that we plot from v-p to
            # v+p
            vals = np.array(stats['vals'])
            vals = 0.5 * width * vals / sf
            bodies += [fill(stats['coords'],
                            -vals + pos,
                            vals + pos,
                            facecolor=col,
                            alpha=0.2)]
        artists['bodies'] = bodies
    
    # plot scatterplot
    if vert:
        artists['scatter'] = plt.scatter(jittered.flatten(),
                                         dataset.T.flatten(),
                                         color=scatter_color_flattened,
                                         **scatter_kwargs)
    else:
        artists['scatter'] = plt.scatter(dataset.T.flatten(),
                                         jittered.flatten(),
                                         color=scatter_color_flattened,
                                         **scatter_kwargs)    
    
    # Render means
    if showmeans:
        artists['cmeans'] = perp_lines(means, pmins, pmaxes, **line_kwargs)
    # Render extrema
    if showextrema:
        artists['cmaxes'] = perp_lines(maxes, pmins, pmaxes, **line_kwargs)
        artists['cmins'] = perp_lines(mins, pmins, pmaxes, **line_kwargs)
        artists['cbars'] = par_lines(positions, mins, maxes, **line_kwargs)

    # Render medians
    if showmedians:
        artists['cmedians'] = perp_lines(medians, pmins, pmaxes, **line_kwargs)

    return artists
Example #8
0
def sinaplot(dataset,
             positions=None,
             vert=True,
             widths=0.5,
             showmeans=False,
             showextrema=True,
             showmedians=False,
             scaled=True,
             show_violin=False,
             points=100,
             bw_method=None,
             ax=None,
             scatter_kwargs=None,
             line_kwargs=None):
    """
    a cross between a violinplot and a scatterplot, from the R package sinaplot.
    Reimplemented by ripping off the violinplot function from matplotlib and 
    tweaking a few bits.
    """
    def _kde_method(X, coords):
        # fallback gracefully if the vector contains only one value
        if np.all(X[0] == X):
            return (X[0] == coords).astype(float)
        kde = GaussianKDE(X, bw_method)
        return kde.evaluate(coords)

    vpstats = cbook.violin_stats(dataset, _kde_method, points=points)

    if ax is None:
        ax = plt.subplot()

    if scatter_kwargs is None:
        scatter_kwargs = dict(s=20, color='b', marker='o', alpha=0.9)
    scatter_color = scatter_kwargs.pop('color')

    if line_kwargs is None:
        line_kwargs = dict(color='red', linewidth='1')

    # Collections to be returned
    artists = {}

    N = len(vpstats)
    datashape_message = ("List of violinplot statistics and `{0}` "
                         "values must have the same length")

    # Validate positions
    if positions is None:
        positions = np.arange(1, N + 1)
    elif len(positions) != N:
        raise ValueError(datashape_message.format("positions"))
    positions = positions.reshape((N, 1))

    # Validate widths
    if np.isscalar(widths):
        widths = np.ones((N, 1)) * widths
    elif len(widths) != N:
        raise ValueError(datashape_message.format("widths"))
    widths = widths.reshape((N, 1))

    # Validate colors
    if isinstance(scatter_color, str):
        scatter_color = [scatter_color] * N
    elif len(scatter_color) != N:
        raise ValueError(datashape_message.format("scatter_color"))

    # Calculate ranges for statistics lines
    pmins = -0.25 * np.array(widths) + positions
    pmaxes = 0.25 * np.array(widths) + positions

    # Check whether we are rendering vertically or horizontally
    if vert:
        fill = ax.fill_betweenx
        perp_lines = ax.hlines
        par_lines = ax.vlines
    else:
        fill = ax.fill_between
        perp_lines = ax.vlines
        par_lines = ax.hlines

    # Calculate jittered scatter positions and render sinaplots
    jittered = []
    means = []
    mins = []
    maxes = []
    medians = []

    scatter_color_flattened = []
    for i in range(N):
        x = dataset[:, i]
        xp = vpstats[i]['coords']
        fp = vpstats[i]['vals']
        #Uses numpy interpolate to estimate the limits for each points
        interp = np.interp(x, xp, fp)
        jittered.append(np.asarray([np.random.uniform(-r, r) for r in interp]))

        # append some stuff for the means/medians/extremities
        means.append(vpstats[i]['mean'])
        mins.append(vpstats[i]['min'])
        maxes.append(vpstats[i]['max'])
        medians.append(vpstats[i]['median'])
        scatter_color_flattened += [
            scatter_color[i],
        ] * len(x)

    jittered = np.asarray(jittered)

    # get scale_factor (either scaled by largest value in all sinaplots or not scaled)
    if scaled:
        scale_factor = np.ones((N, 1)) * jittered.max()
    else:
        scale_factor = jittered.max(1)
        scale_factor = scale_factor.reshape((N, 1))

    jittered = 0.5 * widths * jittered / scale_factor + positions

    # add background density plots
    if show_violin:
        # Render violins
        bodies = []
        for stats, pos, width, sf, col in zip(vpstats, positions, widths,
                                              scale_factor, scatter_color):
            # The 0.5 factor reflects the fact that we plot from v-p to
            # v+p
            vals = np.array(stats['vals'])
            vals = 0.5 * width * vals / sf
            bodies += [
                fill(stats['coords'],
                     -vals + pos,
                     vals + pos,
                     facecolor=col,
                     alpha=0.2)
            ]
        artists['bodies'] = bodies

    # plot scatterplot
    if vert:
        artists['scatter'] = plt.scatter(jittered.flatten(),
                                         dataset.T.flatten(),
                                         color=scatter_color_flattened,
                                         **scatter_kwargs)
    else:
        artists['scatter'] = plt.scatter(dataset.T.flatten(),
                                         jittered.flatten(),
                                         color=scatter_color_flattened,
                                         **scatter_kwargs)

    # Render means
    if showmeans:
        artists['cmeans'] = perp_lines(means, pmins, pmaxes, **line_kwargs)
    # Render extrema
    if showextrema:
        artists['cmaxes'] = perp_lines(maxes, pmins, pmaxes, **line_kwargs)
        artists['cmins'] = perp_lines(mins, pmins, pmaxes, **line_kwargs)
        artists['cbars'] = par_lines(positions, mins, maxes, **line_kwargs)

    # Render medians
    if showmedians:
        artists['cmedians'] = perp_lines(medians, pmins, pmaxes, **line_kwargs)

    return artists