Esempio n. 1
0
def makeFigure8(formulation, LHsamples, paramBounds, normSamples, thresholds):
    # scenarios for event plots
    scenarios = ['Mean', 'C1', 'Std', 'All3_Flood']
    titles = [
        'SOW Trajectory 1', 'SOW Trajectory 2', 'SOW Trajectory 3',
        'SOW Trajectory 4'
    ]
    returnPds = 100.0
    pctiles = 1 / returnPds
    IDs = pctiles * 1000
    colors = ['#e31a1c', '#fb9a99', '#f7f7f7', '#a6cee3', '#1f78b4']
    ylabel = 'Water Level (m)'
    solnNo = formulation.bestFloodSoln.solnNo
    ymax = 18.0

    sns.set()
    fig = plt.figure()
    # plot 100-yr event for each pathway through the SOW space
    for k in range(4):  # 4 trajectories
        # load simulations from most robust solution across scenarios and find year of 100-yr event
        ax = fig.add_subplot(1, 4, k + 1)

        for i in range(5):  # 5 pts along trajectory
            soln = getSoln(solnNo, scenarios[k], i + 1)
            yvalues = soln.HanoiLev
            maxFloods = np.max(soln.HanoiLev, 1)
            year = np.argsort(maxFloods)[::-1][int(IDs) - 1]
            yvalues = yvalues[year, :]

            ax.plot(range(0, 365), yvalues, c=colors[-(1 + i)], linewidth=2)
            ax.fill_between(range(0, 365),
                            yvalues,
                            color=colors[-(1 + i)],
                            zorder=5 - i)

        ax.plot([0, 364], [13.4, 13.4], c='k', linewidth=2)  # dike height

        ax.set_xlim([0, 364])
        ax.set_ylim([0, ymax])
        ax.set_xticks([45, 137, 229, 319])
        ax.set_xticklabels(['Jun', 'Sep', 'Dec', 'Mar'], fontsize=18)

        if k == 0:
            ax.set_ylabel(ylabel, fontsize=22)
            ax.tick_params(axis='y', labelsize=18)
        else:
            ax.tick_params(axis='y', labelleft='off')

        ax.set_title(titles[k], fontsize=22)

    fig.suptitle(
        'Water level time series during 100-yr flood with most robust solution for flooding',
        fontsize=22)
    fig.set_size_inches([26.4, 6.1])
    fig.savefig('Figure8.pdf')
    fig.clf()

    return None
Esempio n. 2
0
def plot_activity_matrix(df,
                         cmap,
                         normalized=False,
                         annotate=True,
                         out_path='',
                         title=''):
    """
    Plot activity matrix showing area of land transitioning between land-use types
    :param df:
    :param cmap:
    :param normalized:
    :param annotate:
    :param out_path:
    :param title:
    :return:
    """
    logger.info('Plot activity matrix')
    sns.set(font_scale=0.8)

    formatter = tkr.ScalarFormatter(useMathText=True)
    # normalized scale is from 0 - 100, does not need scientific scale
    if not normalized:
        formatter.set_scientific(True)
        formatter.set_powerlimits((-2, 2))

    df = df * 100.0 if normalized else df * 1.0
    vmin = math.ceil(np.nanmin(df))
    vmax = math.ceil(np.nanmax(df))  # maximum value on colorbar
    ax = sns.heatmap(df,
                     cbar_kws={'format': formatter},
                     cmap=cmap,
                     linewidths=.5,
                     linecolor='lightgray',
                     annot=annotate,
                     fmt='.2g',
                     annot_kws={'size': 6},
                     vmin=vmin,
                     vmax=vmax)
    # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6}
    # ax.invert_yaxis()
    ax.set_ylabel('FROM')
    ax.set_xlabel('TO')

    ax.set_title(title)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=0)
    locs, labels = plt.yticks()
    plt.setp(labels, rotation=0)

    plt.savefig(out_path, dpi=constants.DPI)
    plt.close()

    # revert matplotlib params
    sns.reset_orig()
    set_matplotlib_params()
    get_colors(palette='tableau')
Esempio n. 3
0
def plot_activity_matrix(df, cmap, normalized=False, annotate=True, out_path='', title=''):
    """
    Plot activity matrix showing area of land transitioning between land-use types
    :param df:
    :param cmap:
    :param normalized:
    :param annotate:
    :param out_path:
    :param title:
    :return:
    """
    logger.info('Plot activity matrix')
    sns.set(font_scale=0.8)

    formatter = tkr.ScalarFormatter(useMathText=True)
    # normalized scale is from 0 - 100, does not need scientific scale
    if not normalized:
        formatter.set_scientific(True)
        formatter.set_powerlimits((-2, 2))

    df = df * 100.0 if normalized else df * 1.0
    vmin = math.ceil(np.nanmin(df))
    vmax = math.ceil(np.nanmax(df))  # maximum value on colorbar
    ax = sns.heatmap(df, cbar_kws={'format': formatter}, cmap=cmap,
                     linewidths=.5, linecolor='lightgray', annot=annotate, fmt='.2g', annot_kws={'size': 6}, vmin=vmin,
                     vmax=vmax)
    # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6}
    # ax.invert_yaxis()
    ax.set_ylabel('FROM')
    ax.set_xlabel('TO')

    ax.set_title(title)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=0)
    locs, labels = plt.yticks()
    plt.setp(labels, rotation=0)

    plt.savefig(out_path, dpi=constants.DPI)
    plt.close()

    # revert matplotlib params
    sns.reset_orig()
    set_matplotlib_params()
    get_colors(palette='tableau')
Esempio n. 4
0
def plot_qq(clf, X, y, figsize=(7, 7)):
    """Generate a Q-Q plot (a.k.a. normal quantile plot).

    Parameters
    ----------
    clf : sklearn.linear_model
        A scikit-learn linear model classifier with a `predict()` method.
    X : numpy.ndarray
        Training data used to fit the classifier.
    y : numpy.ndarray
        Target training values, of shape = [n_samples].
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (7, 7).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    # Ensure we only plot residuals using classifiers we have tested
    assert isinstance(clf, _utils.supported_linear_models), (
        "Classifiers of type {0} not currently supported.".format(type(clf)))
    residuals = stats.residuals(clf, X, y, r_type='raw')
    prob_plot = sm.ProbPlot(residuals, scipy.stats.t, fit=True)
    # Set plot style
    sns.set_style("darkgrid")
    sns.set(font_scale=1.2)
    # Generate plot
    try:
        # Q-Q plot doesn't respond to figure size, so prep a figure first
        fig, ax = plt.subplots(figsize=figsize)
        prob_plot.qqplot(line='45', ax=ax)
        plt.title("Normal Quantile Plot")
        plt.xlabel("Theoretical Standardized Residuals")
        plt.ylabel("Actual Standardized Residuals")
        plt.show()
    except:
        raise  # Re-raise the exception
    finally:
        sns.reset_orig()
    return fig
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
import time

import warnings

warnings.filterwarnings("ignore")

from scipy import optimize
import pymc3 as pm
import theano as thno
import theano.tensor as T

# configure some basic options
sns.set(style="darkgrid", palette="deep")
# pd.set_option('display.notebook_repr_html', True)
plt.rcParams["figure.figsize"] = 12, 8
np.random.seed(0)


#### cut & pasted directly from the fetch_hogg2010test() function
## identical to the original dataset as hardcoded in the Hogg 2010 paper

dfhogg = pd.DataFrame(
    np.array(
        [
            [1, 201, 592, 61, 9, -0.84],
            [2, 244, 401, 25, 4, 0.31],
            [3, 47, 583, 38, 11, 0.64],
            [4, 287, 402, 15, 7, -0.27],
import pandas as pd
import numpy as np
from sklearn import linear_model
import seaborn.apionly as sns
import matplotlib.pyplot as plt

sns.set(style='whitegrid', context='notebook')

# Displaying original data
df = pd.read_csv("data/CHD.csv", header=0)
plt.figure()
plt.axis([0, 70, -0.2, 1.2])
plt.title("Original data")
plt.scatter(df['age'], df['chd'])  # Plot a scatter draw of the random data points
plt.show()

# Creating logistic regression model
logistic = linear_model.LogisticRegression(C=1e5)
logistic.fit(df['age'].values.reshape(100, 1), df['chd'].values.reshape(100, 1))
linear_model.LogisticRegression(C=100000.0, class_weight=None, dual=False,
                                fit_intercept=True, intercept_scaling=1, max_iter=100,
                                multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
                                solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

x_plot = np.linspace(10, 90, 100)
oneprob = []
zeroprob = []
predict = []
plt.figure(figsize=(10, 10))
for i in x_plot:
    temp_val = np.array(i).reshape(1, -1)
def contrastplot_test(data,
                      x,
                      y,
                      idx=None,
                      alpha=0.75,
                      axis_title_size=None,
                      barWidth=5,
                      contrastShareY=True,
                      contrastEffectSizeLineStyle='solid',
                      contrastEffectSizeLineColor='black',
                      contrastYlim=None,
                      contrastZeroLineStyle='solid',
                      contrastZeroLineColor='black',
                      effectSizeYLabel="Effect Size",
                      figsize=None,
                      floatContrast=True,
                      floatSwarmSpacer=0.2,
                      heightRatio=(1, 1),
                      idcol=None,
                      lineWidth=2,
                      legend=True,
                      legendFontSize=14,
                      legendFontProps={},
                      paired=False,
                      pal=None,
                      rawMarkerSize=8,
                      rawMarkerType='o',
                      reps=3000,
                      showGroupCount=True,
                      show95CI=False,
                      showAllYAxes=False,
                      showRawData=True,
                      smoothboot=False,
                      statfunction=None,
                      summaryBar=False,
                      summaryBarColor='grey',
                      summaryBarAlpha=0.25,
                      summaryColour='black',
                      summaryLine=True,
                      summaryLineStyle='solid',
                      summaryLineWidth=0.25,
                      summaryMarkerSize=10,
                      summaryMarkerType='o',
                      swarmShareY=True,
                      swarmYlim=None,
                      tickAngle=45,
                      tickAlignment='right',
                      violinOffset=0.375,
                      violinWidth=0.2,
                      violinColor='k',
                      xticksize=None,
                      yticksize=None,
                      **kwargs):
    '''Takes a pandas dataframe and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    -----------------------------------------------------------------------
    Description of flags upcoming.'''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError(
            "The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # Get and set levels of data[x]
    if idx is None:
        widthratio = [1]
        allgrps = np.sort(data[x].unique())
        if paired:
            # If `idx` is not specified, just take the FIRST TWO levels alphabetically.
            tuple_in = tuple(allgrps[0:2], )
        else:
            # No idx is given, so all groups are compared to the first one in the DataFrame column.
            tuple_in = (tuple(allgrps), )
            if len(allgrps) > 2:
                floatContrast = False

    else:
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            tuple_in = (idx, )
            widthratio = [1]
            if len(idx) > 2:
                floatContrast = False
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            tuple_in = idx
            if (any(len(element) > 2 for element in tuple_in)):
                # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                floatContrast = False
            # Make sure the widthratio of the seperate multiplot corresponds to how
            # many groups there are in each one.
            widthratio = []
            for i in tuple_in:
                widthratio.append(len(i))
        else:
            raise TypeError(
                "The object passed to `idx` consists of a mixture of single strings and tuples. \
                Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting."
            )

    # initialise statfunction
    if statfunction == None:
        statfunction = np.mean

    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()
    # # Calculate the bootstraps according to idx.
    # for ix, current_tuple in enumerate(tuple_in):
    #     bscontrast=list()
    #     for i in range (1, len(current_tuple)):
    #     # Note that you start from one. No need to do auto-contrast!
    #         tempbs=bootstrap_contrast(
    #             data=data,
    #             x=x,
    #             y=y,
    #             idx=[current_tuple[0], current_tuple[i]],
    #             statfunction=statfunction,
    #             smoothboot=smoothboot,
    #             reps=reps)
    #         bscontrast.append(tempbs)
    #         contrastList.append(tempbs)
    #         contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol = kwargs['hue']
            colGrps = data[colorCol].unique()
            nColors = len(colGrps)
        else:
            colorCol = x
            colGrps = data[x].unique()
            nColors = len([element for tupl in tuple_in for element in tupl])
        plotPal = dict(zip(colGrps, sns.color_palette(n_colors=nColors)))
    else:
        plotPal = pal

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar = True
        summaryLine = False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine = False

    if swarmYlim is None:
        # get range of _selected groups_.
        u = list()
        for t in idx:
            for i in np.unique(t):
                u.append(i)
        u = np.unique(u)
        tempdat = data[data[x].isin(u)]
        swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])])
    else:
        swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]])

    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])

    barWidth = barWidth / 1000  # Not sure why have to reduce the barwidth by this much!
    if showRawData is True:
        maxSwarmSpan = 0.25
    else:
        maxSwarmSpan = barWidth

    # Expand the ylim in both directions.
    ## Find half of the range of swarm_ylim.
    swarmrange = swarm_ylim[1] - swarm_ylim[0]
    pad = 0.1 * swarmrange
    x2 = np.array([swarm_ylim[0] - pad, swarm_ylim[1] + pad])
    swarm_ylim = x2

    # plot params
    if axis_title_size is None:
        axis_title_size = 25
    if yticksize is None:
        yticksize = 18
    if xticksize is None:
        xticksize = 18

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}
    svgParams = {'fonttype': 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams)

    if figsize is None:
        if len(tuple_in) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (8, (8 / np.sqrt(2)))

    # Initialise figure, taking into account desired figsize.
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(tuple_in)[0],
        # 1 row; columns based on number of tuples in tuple.
        width_ratios=widthratio,
        wspace=0)

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        plotdat = data[data[x].isin(current_tuple)]
        plotdat[x] = plotdat[x].astype("category")
        plotdat[x].cat.set_categories(current_tuple,
                                      ordered=True,
                                      inplace=True)
        plotdat.sort_values(by=[x])
        # Drop all nans.
        plotdat = plotdat.dropna()

        # Calculate summaries.
        summaries = plotdat.groupby([x], sort=True)[y].apply(statfunction)

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            # Create subGridSpec with 2 rows and 1 column.
            subGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx], wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      subGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast = list()
        for i in range(1, len(current_tuple)):
            # Note that you start from one. No need to do auto-contrast!
            tempbs = bootstrap_contrast(
                data=data,
                x=x,
                y=y,
                idx=[current_tuple[0], current_tuple[i]],
                statfunction=statfunction,
                smoothboot=smoothboot,
                reps=reps)
            bscontrast.append(tempbs)
            contrastList.append(tempbs)
            contrastListNames.append(current_tuple[i] + ' vs. ' +
                                     current_tuple[0])

        #### PLOT RAW DATA.
        if showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            ax_raw.set_ylim(swarm_ylim)
            sw = sns.swarmplot(data=plotdat,
                               x=x,
                               y=y,
                               order=current_tuple,
                               ax=ax_raw,
                               alpha=alpha,
                               palette=plotPal,
                               size=rawMarkerSize,
                               marker=rawMarkerType,
                               **kwargs)

        if summaryBar is True:
            bar_raw = sns.barplot(x=summaries.index.tolist(),
                                  y=summaries.values,
                                  facecolor=summaryBarColor,
                                  ax=ax_raw,
                                  alpha=summaryBarAlpha)

        if floatContrast:
            # Get horizontal offset values.
            maxXBefore = max(sw.collections[0].get_offsets().T[0])
            minXAfter = min(sw.collections[1].get_offsets().T[0])
            xposAfter = maxXBefore + floatSwarmSpacer
            xAfterShift = minXAfter - xposAfter
            # shift the swarmplots
            offsetSwarmX(sw.collections[1], -xAfterShift)

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + (width / 2.)
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan / 2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                bar.set_width(maxSwarmSpan)

            ## Set the ticks locations for ax_raw.
            ax_raw.xaxis.set_ticks((0, xposAfter))
            firstTick = ax_raw.xaxis.get_ticklabels()[0].get_text()
            secondTick = ax_raw.xaxis.get_ticklabels()[1].get_text()
            ax_raw.set_xticklabels(
                [
                    firstTick,  #+' n='+count[firstTick],
                    secondTick
                ],  #+' n='+count[secondTick]],
                rotation=tickAngle,
                horizontalalignment=tickAlignment)

        if summaryLine is True:
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i - summaryLineWidth,
                     i + summaryLineWidth),  # x-coordinates
                    (m, m),
                    color=summaryColour,
                    linestyle=summaryLineStyle)

        if show95CI is True:
            sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple) == 2:
            # Plot the CIs on the contrast axes.
            plotbootstrap(sw.collections[1],
                          bslist=tempbs,
                          ax=ax_contrast,
                          violinWidth=violinWidth,
                          violinOffset=violinOffset,
                          markersize=summaryMarkerSize,
                          marker=summaryMarkerType,
                          offset=floatContrast,
                          color=violinColor,
                          linewidth=1)
            if floatContrast:
                # Set reference lines
                ## First get leftmost limit of left reference group
                xtemp, _ = np.array(sw.collections[0].get_offsets()).T
                leftxlim = xtemp.min()
                ## Then get leftmost limit of right test group
                xtemp, _ = np.array(sw.collections[1].get_offsets()).T
                rightxlim = xtemp.min()

                ## zero line
                ax_contrast.hlines(
                    0,  # y-coordinates
                    leftxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastZeroLineStyle,
                    linewidth=0.75,
                    color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(
                    tempbs['summary'],
                    rightxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastEffectSizeLineStyle,
                    linewidth=0.75,
                    color=contrastEffectSizeLineColor)

                ## If the effect size is positive, shift the right axis up.
                if float(tempbs['summary']) > 0:
                    rightmin = ax_raw.get_ylim()[0] - float(tempbs['summary'])
                    rightmax = ax_raw.get_ylim()[1] - float(tempbs['summary'])
                ## If the effect size is negative, shift the right axis down.
                elif float(tempbs['summary']) < 0:
                    rightmin = ax_raw.get_ylim()[0] + float(tempbs['summary'])
                    rightmax = ax_raw.get_ylim()[1] + float(tempbs['summary'])

                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx > 0:
                    ax_contrast.set_ylabel('')

                align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                # Set xlims so everything is properly visible!
                swarm_xbounds = ax_raw.get_xbound()
                ax_contrast.set_xbound(
                    swarm_xbounds[0] - (summaryLineWidth * 1.1),
                    swarm_xbounds[1] + (summaryLineWidth * 1.1))

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(bslist=bscontrast,
                                   ax=ax_contrast,
                                   violinWidth=violinWidth,
                                   violinOffset=violinOffset,
                                   markersize=summaryMarkerSize,
                                   marker=summaryMarkerType,
                                   linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx > 0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    ########
    axesCount = len(fig.get_axes())

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx = fig.axes[i]

        if i != axesCount - 2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(showAllYAxes)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(
                Line2D((axx.xaxis.get_view_interval()[0],
                        axx.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

        # I don't know why the swarm axes controls the contrast axes ticks....
        if showGroupCount:
            count = data.groupby(x).count()[y]
            newticks = list()
            for ix, t in enumerate(axx.xaxis.get_ticklabels()):
                t_text = t.get_text()
                nt = t_text + ' n=' + str(count[t_text])
                newticks.append(nt)
            axx.xaxis.set_ticklabels(newticks)

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i == axesCount - 2:  # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                           bbox_to_anchor=(1.1, 1.0),
                           fontsize=legendFontSize,
                           **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j, i in enumerate(range(1, axesCount, 2)):
        axx = fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright = axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                       xmin=xleft - 1,
                       xmax=xright + 1,
                       linestyle=contrastZeroLineStyle,
                       linewidth=0.75,
                       color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)
            # # Draw back x-axis lines connecting ticks.
            # drawback_x(axx)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes.
                    # Not entirely sure why I have to do this.
                    drawback_y(axx)

            sns.despine(ax=axx,
                        top=True,
                        right=True,
                        left=False,
                        bottom=False,
                        trim=True)

            # Rotate tick labels.
            rotateTicks(axx, tickAngle, tickAlignment)

        else:
            # Re-draw the floating axis to the correct limits.
            lower = np.min(contrastList.ix['diffarray', j])
            upper = np.max(contrastList.ix['diffarray', j])
            meandiff = contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower > 0:
                lower = 0.
            if upper < 0:
                upper = 0.

            ## Get the tick interval from the left y-axis.
            leftticks = fig.get_axes()[i - 1].get_yticks()
            tickstep = leftticks[1] - leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1 = axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount > 2 and contrastShareY is True and floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower = list()
            upper = list()
            for c in range(0, len(contrastList.columns)):
                lower.append(np.min(contrastList.ix['bca_ci_low', c]))
                upper.append(np.max(contrastList.ix['bca_ci_high', c]))
            lower = np.min(lower)
            upper = np.max(upper)
        else:
            lower = contrastYlim[0]
            upper = contrastYlim[1]

        normalizeContrastY(fig,
                           contrast_ylim=contrastYlim,
                           show_all_yaxes=showAllYAxes)

    # if (axesCount==2 and
    #     floatContrast is False):
    #     drawback_x(fig.get_axes()[1])
    #     drawback_y(fig.get_axes()[1])

    # if swarmShareY is False:
    #     for i in range(0, axesCount, 2):
    #         drawback_y(fig.get_axes()[i])

    # if contrastShareY is False:
    #     for i in range(1, axesCount, 2):
    #         if floatContrast is True:
    #             sns.despine(ax=fig.get_axes()[i],
    #                        top=True, right=False, left=True, bottom=True,
    #                        trim=True)
    #         else:
    #             sns.despine(ax=fig.get_axes()[i], trim=True)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're all done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
def pairedcontrast(data, x, y, idcol, reps = 3000,
statfunction = None, idx = None, figsize = None,
beforeAfterSpacer = 0.01, 
violinWidth = 0.005, 
floatOffset = 0.05, 
showRawData = False,
showAllYAxes = False,
floatContrast = True,
smoothboot = False,
floatViolinOffset = None, 
showConnections = True,
summaryBar = False,
contrastYlim = None,
swarmYlim = None,
barWidth = 0.005,
rawMarkerSize = 8,
rawMarkerType = 'o',
summaryMarkerSize = 10,
summaryMarkerType = 'o',
summaryBarColor = 'grey',
meansSummaryLineStyle = 'solid', 
contrastZeroLineStyle = 'solid', contrastEffectSizeLineStyle = 'solid',
contrastZeroLineColor = 'black', contrastEffectSizeLineColor = 'black',
pal = None,
legendLoc = 2, legendFontSize = 12, legendMarkerScale = 1,
axis_title_size = None,
yticksize = None,
xticksize = None,
tickAngle=45,
tickAlignment='right',
**kwargs):

    # Preliminaries.
    data = data.dropna()

    # plot params
    if axis_title_size is None:
        axis_title_size = 15
    if yticksize is None:
        yticksize = 12
    if xticksize is None:
        xticksize = 12

    axisTitleParams = {'labelsize' : axis_title_size}
    xtickParams = {'labelsize' : xticksize}
    ytickParams = {'labelsize' : yticksize}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)

    ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
    if idx is None:
        idx = tuple(np.unique(data[x])[0:2],)
    else:
        # check if multi-plot or not
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            if len(idx) != 2:
                print(idx, "does not have length 2.")
                sys.exit(0)
            else:
                idx = (tuple(idx, ),)
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            if ( any(len(element) != 2 for element in idx) ):
                # If any of the tuples contain more than 2 elements.
                print(element, "does not have length 2.")
                sys.exit(0)
    if floatViolinOffset is None:
        floatViolinOffset = beforeAfterSpacer/2
    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0],contrastYlim[1]])
    if swarmYlim is not None:
        swarmYlim = np.array([swarmYlim[0],swarmYlim[1]])

    ## Here we define the palette on all the levels of the 'x' column.
    ## Thus, if the same pandas dataframe is re-used across different plots,
    ## the color identity of each group will be maintained.
    ## Set palette based on total number of categories in data['x'] or data['hue_column']
    if 'hue' in kwargs:
        u = kwargs['hue']
    else:
        u = x
    if ('color' not in kwargs and 'hue' not in kwargs):
        kwargs['color'] = 'k'

    if pal is None:
        pal = dict( zip( data[u].unique(), sns.color_palette(n_colors = len(data[u].unique())) ) 
                      )
    else:
        pal = pal

    # Initialise figure.
    if figsize is None:
        if len(idx) > 2:
            figsize = (12,(12/np.sqrt(2)))
        else:
            figsize = (6,6)
    fig = plt.figure(figsize = figsize)

    # Initialise GridSpec based on `levs_tuple` shape.
    gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple.
    # Set default statfunction
    if statfunction is None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    for gsIdx, xlevs in enumerate(idx):
        ## Pivot tempdat to get before and after lines.
        data_pivot = data.pivot_table(index = idcol, columns = x, values = y)

        # Start plotting!!
        if floatContrast is True:
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on = False)
            ax_contrast = ax_raw.twinx()
        else:
            gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec = gsMain[gsIdx])
            ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on = False)
            ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex = ax_raw, frame_on = False)

        ## Plot raw data as swarmplot or stripplot.
        if showRawData is True:
            swarm_raw = sns.swarmplot(data = data, 
                                     x = x, y = y, 
                                     order = xlevs,
                                     ax = ax_raw,
                                     palette = pal,
                                     size = rawMarkerSize,
                                     marker = rawMarkerType,
                                     **kwargs)
        else:
            swarm_raw = sns.stripplot(data = data, 
                                     x = x, y = y, 
                                     order = xlevs,
                                     ax = ax_raw,
                                     palette = pal,
                                     **kwargs)
        swarm_raw.set_ylim(swarmYlim)
           
        ## Get some details about the raw data.
        maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0])
        minXAfter = min(swarm_raw.collections[1].get_offsets().T[0])
        if showRawData is True:
            #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2
            beforeAfterSpacer = 1
        xposAfter = maxXBefore + beforeAfterSpacer
        xAfterShift = minXAfter - xposAfter

        ## shift the after swarmpoints closer for aesthetic purposes.
        offsetSwarmX(swarm_raw.collections[1], -xAfterShift)

        ## pandas DataFrame of 'before' group
        x1 = pd.DataFrame({str(xlevs[0] + '_x') : pd.Series(swarm_raw.collections[0].get_offsets().T[0]),
                       xlevs[0] : pd.Series(swarm_raw.collections[0].get_offsets().T[1]),
                       '_R_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[0]),
                       '_G_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[1]),
                       '_B_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[2]),
                      })
        ## join the RGB columns into a tuple, then assign to a column.
        x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) 
        x1 = x1.sort_values(by = xlevs[0])
        x1.index = data_pivot.sort_values(by = xlevs[0]).index

        ## pandas DataFrame of 'after' group
        ### create convenient signifiers for column names.
        befX = str(xlevs[0] + '_x')
        aftX = str(xlevs[1] + '_x')

        x2 = pd.DataFrame( {aftX : pd.Series(swarm_raw.collections[1].get_offsets().T[0]),
            xlevs[1] : pd.Series(swarm_raw.collections[1].get_offsets().T[1])} )
        x2 = x2.sort_values(by = xlevs[1])
        x2.index = data_pivot.sort_values(by = xlevs[1]).index

        ## Join x1 and x2, on both their indexes.
        plotPoints = x1.merge(x2, left_index = True, right_index = True, how='outer')

        ## Add the hue column if hue argument was passed.
        if 'hue' in kwargs:
            h = kwargs['hue']
            plotPoints[h] = data.pivot(index = idcol, columns = x, values = h)[xlevs[0]]
            swarm_raw.legend(loc = legendLoc, 
                fontsize = legendFontSize, 
                markerscale = legendMarkerScale)

        ## Plot the lines to join the 'before' points to their respective 'after' points.
        if showConnections is True:
            for i in plotPoints.index:
                ax_raw.plot([ plotPoints.ix[i, befX],
                    plotPoints.ix[i, aftX] ],
                    [ plotPoints.ix[i, xlevs[0]], 
                    plotPoints.ix[i, xlevs[1]] ],
                    linestyle = 'solid',
                    color = plotPoints.ix[i, '_hue_'],
                    linewidth = 0.75,
                    alpha = 0.75
                    )

        ## Hide the raw swarmplot data if so desired.
        if showRawData is False:
            swarm_raw.collections[0].set_visible(False)
            swarm_raw.collections[1].set_visible(False)

        if showRawData is True:
            #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2
            maxSwarmSpan = 0.5
        else:
            maxSwarmSpan = barWidth            

        ## Plot Summary Bar.
        if summaryBar is True:
            # Calculate means
            means = data.groupby([x], sort = True).mean()[y]
            # # Calculate medians
            # medians = data.groupby([x], sort = True).median()[y]

            ## Draw summary bar.
            bar_raw = sns.barplot(x = means.index, 
                        y = means.values, 
                        order = xlevs,
                        ax = ax_raw,
                        ci = 0,
                        facecolor = summaryBarColor, 
                        alpha = 0.25)
            ## Draw zero reference line.
            ax_raw.add_artist(Line2D(
                (ax_raw.xaxis.get_view_interval()[0], 
                    ax_raw.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )       

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + width/2.
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan/2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan/2.)
                bar.set_width(maxSwarmSpan)

        # Get y-limits of the treatment swarm points.
        beforeRaw = pd.DataFrame( swarm_raw.collections[0].get_offsets() )
        afterRaw = pd.DataFrame( swarm_raw.collections[1].get_offsets() )
        before_leftx = min(beforeRaw[0])
        after_leftx = min(afterRaw[0])
        after_rightx = max(afterRaw[0])
        after_stat_summary = statfunction(beforeRaw[1])

        # Calculate the summary difference and CI.
        plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]]
        plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0]

        tempseries = plotPoints['delta_y'].tolist()
        test = tempseries.count(tempseries[0]) != len(tempseries)

        bootsDelta = bootstrap(plotPoints['delta_y'],
            statfunction = statfunction, 
            smoothboot = smoothboot,
            reps = reps)
        summDelta = bootsDelta['summary']
        lowDelta = bootsDelta['bca_ci_low']
        highDelta = bootsDelta['bca_ci_high']

        # set new xpos for delta violin.
        if floatContrast is True:
            if showRawData is False:
                xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset
            else:
                xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan
        else:
            xposPlusViolin = xposAfter
        if showRawData is True:
            # If showRawData is True and floatContrast is True, 
            # set violinwidth to the barwidth.
            violinWidth = maxSwarmSpan

        xmaxPlot = xposPlusViolin + violinWidth

        # Plot the summary measure.
        ax_contrast.plot(xposPlusViolin, summDelta,
            marker = 'o',
            markerfacecolor = 'k', 
            markersize = summaryMarkerSize,
            alpha = 0.75
            )

        # Plot the CI.
        ax_contrast.plot([xposPlusViolin, xposPlusViolin],
            [lowDelta, highDelta],
            color = 'k', 
            alpha = 0.75,
            linestyle = 'solid'
            )

        # Plot the violin-plot.
        v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], 
                                 widths = violinWidth, 
                                 showextrema = False, 
                                 showmeans = False)
        halfviolin(v, half = 'right', color = 'k')

        # Remove left axes x-axis title.
        ax_raw.set_xlabel("")
        # Remove floating axes y-axis title.
        ax_contrast.set_ylabel("")

        # Set proper x-limits
        ax_raw.set_xlim(before_leftx - beforeAfterSpacer/2, xmaxPlot)
        ax_raw.get_xaxis().set_view_interval(before_leftx - beforeAfterSpacer/2, 
            after_rightx + beforeAfterSpacer/2)
        ax_contrast.set_xlim(ax_raw.get_xlim())

        if floatContrast is True:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            # Make sure they have the same y-limits.
            ax_contrast.set_ylim(ax_raw.get_ylim())
            
            # Drawing in the x-axis for ax_raw.
            ## Set the tick labels!
            ax_raw.set_xticklabels(xlevs, rotation = tickAngle, horizontalalignment = tickAlignment)
            ## Get lowest y-value for ax_raw.
            y = ax_raw.get_yaxis().get_view_interval()[0] 

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]),
                           ax_contrast, 0)

            # Add label to floating axes. But on ax_raw!
            ax_raw.text(x = deltaSwarmX,
                          y = ax_raw.get_yaxis().get_view_interval()[0],
                          horizontalalignment = 'left',
                          s = 'Difference',
                          fontsize = 15)        

            # Set reference lines
            ## zero line
            ax_contrast.hlines(0,                                           # y-coordinate
                            ax_contrast.xaxis.get_majorticklocs()[0],       # x-coordinates, start and end.
                            ax_raw.xaxis.get_view_interval()[1],   
                            linestyle = 'solid',
                            linewidth = 0.75,
                            color = 'black')

            ## effect size line
            ax_contrast.hlines(summDelta, 
                            ax_contrast.xaxis.get_majorticklocs()[1],
                            ax_raw.xaxis.get_view_interval()[1],
                            linestyle = 'solid',
                            linewidth = 0.75,
                            color = 'black')

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.)
        else:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))
            
            fig.add_subplot(ax_raw)
            fig.add_subplot(ax_contrast)
        ax_contrast.set_ylim(contrastYlim)
        # Calculate p-values.
        # 1-sample t-test to see if the mean of the difference is different from 0.
        ttestresult = ttest_1samp(plotPoints['delta_y'], popmean = 0)[1]
        bootsDelta['ttest_pval'] = ttestresult
        contrastList.append(bootsDelta)
        contrastListNames.append( str(xlevs[1])+' v.s. '+str(xlevs[0]) )

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Now we iterate thru the contrast axes to normalize all the ylims.
    for j,i in enumerate(range(1, len(fig.get_axes()), 2)):
        axx=fig.get_axes()[i]
        ## Get max and min of the dataset.
        lower = np.min(contrastList.ix['stat_array',j])
        upper = np.max(contrastList.ix['stat_array',j])
        meandiff = contrastList.ix['summary', j]

        ## Make sure we have zero in the limits.
        if lower > 0:
            lower = 0.
        if upper < 0:
            upper = 0.

        ## Get tick distance on raw axes.
        ## This will be the tick distance for the contrast axes.
        rawAxesTicks = fig.get_axes()[i-1].yaxis.get_majorticklocs()
        rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0]

        ## First re-draw of axis with new tick interval
        axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist))
        newticks1 = fig.get_axes()[i].get_yticks()

        if floatContrast is False:
            if (showAllYAxes is False and i in range( 2, len(fig.get_axes())) ):
                axx.get_yaxis().set_visible(showAllYAxes)
            else:
                ## Obtain major ticks that comfortably encompass lower and upper.
                newticks2 = list()
                for a,b in enumerate(newticks1):
                    if (b >= lower and b <= upper):
                        # if the tick lies within upper and lower, take it.
                        newticks2.append(b)
                # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
                if np.max(newticks2) < meandiff:
                    ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                    newticks2.append( newticks1[ind+1] )
                elif meandiff < np.min(newticks2):
                    ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                    newticks2.append( newticks1[ind-1] )
                newticks2 = np.array(newticks2)
                newticks2.sort()
                axx.yaxis.set_major_locator(FixedLocator(locs = newticks2))

                ## Draw zero reference line.
                axx.hlines(y = 0,
                    xmin = fig.get_axes()[i].get_xaxis().get_view_interval()[0], 
                    xmax = fig.get_axes()[i].get_xaxis().get_view_interval()[1],
                    linestyle = contrastZeroLineStyle,
                    linewidth = 0.75,
                    color = contrastZeroLineColor)

                sns.despine(ax = fig.get_axes()[i], trim = True, 
                    bottom = False, right = True,
                    left = False, top = True)

                ## Draw back the lines for the relevant y-axes.
                drawback_y(axx)

                ## Draw back the lines for the relevant x-axes.
                drawback_x(axx)

        elif floatContrast is True:
            ## Get the original ticks on the floating y-axis.
            newticks1 = fig.get_axes()[i].get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Re-draw the axis.
            axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) 

            ## Despine and trim the axes.
            sns.despine(ax = axx, trim = True, 
                bottom = False, right = False,
                left = True, top = True)

    for i in range(0, len(fig.get_axes()), 2):
        # Loop through the raw data swarmplots and despine them appropriately.
        if floatContrast is True:
            sns.despine(ax = fig.get_axes()[i], trim = True, right = True)

        else:
            sns.despine(ax = fig.get_axes()[i], trim = True, bottom = True, right = True)
            fig.get_axes()[i].get_xaxis().set_visible(False)

        # Draw back the lines for the relevant y-axes.
        ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0]
        ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1]
        x, _ = fig.get_axes()[i].get_xaxis().get_view_interval()
        fig.get_axes()[i].add_artist(Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5))    

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace = 0)
    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList
Esempio n. 9
0
def contrastplot(
    data, x=None, y=None, idx=None, idcol=None,

    alpha=0.75, 
    axis_title_size=None,

    ci=95,
    contrastShareY=True,
    contrastEffectSizeLineStyle='solid',
    contrastEffectSizeLineColor='black',

    contrastYlim=None,
    contrastZeroLineStyle='solid', 
    contrastZeroLineColor='black', 
    connectPairs=True,

    effectSizeYLabel="Effect Size", 

    figsize=None, 
    floatContrast=True,
    floatSwarmSpacer=0.2,

    heightRatio=(1, 1),

    lineWidth=2,
    legend=True,
    legendFontSize=14,
    legendFontProps={},

    paired=False,
    pairedDeltaLineAlpha=0.3,
    pairedDeltaLineWidth=1.2,
    pal=None, 

    rawMarkerSize=8,
    rawMarkerType='o',
    reps=3000,
    
    showGroupCount=True,
    showCI=False, 
    showAllYAxes=False,
    showRawData=True,
    smoothboot=False, 
    statfunction=None, 

    summaryBar=False, 
    summaryBarColor='grey',
    summaryBarAlpha=0.25,

    summaryColour='black', 
    summaryLine=True, 
    summaryLineStyle='solid', 
    summaryLineWidth=0.25, 

    summaryMarkerSize=10, 
    summaryMarkerType='o',

    swarmShareY=True, 
    swarmYlim=None, 

    tickAngle=45,
    tickAlignment='right',

    violinOffset=0.375,
    violinWidth=0.2, 
    violinColor='k',

    xticksize=None,
    yticksize=None,

    **kwargs):

    '''Takes a pandas DataFrame and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    Paired and unpaired options available.

    Keyword arguments:
        data: pandas DataFrame
            
        x: string
            column name containing categories to be plotted on the x-axis.

        y: string
            column name containing values to be plotted on the y-axis.

        idx: tuple
            flxible declaration of groupwise comparisons.

        idcol: string
            for paired plots.

        alpha: float
            alpha (transparency) of raw swarmed data points.
            
        axis_title_size=None
        ci=95
        contrastShareY=True
        contrastEffectSizeLineStyle='solid'
        contrastEffectSizeLineColor='black'
        contrastYlim=None
        contrastZeroLineStyle='solid'
        contrastZeroLineColor='black'
        effectSizeYLabel="Effect Size"
        figsize=None
        floatContrast=True
        floatSwarmSpacer=0.2
        heightRatio=(1,1)
        lineWidth=2
        legend=True
        legendFontSize=14
        legendFontProps={}
        paired=False
        pairedDeltaLineAlpha=0.3
        pairedDeltaLineWidth=1.2
        pal=None
        rawMarkerSize=8
        rawMarkerType='o'
        reps=3000
        showGroupCount=True
        showCI=False
        showAllYAxes=False
        showRawData=True
        smoothboot=False
        statfunction=None
        summaryBar=False
        summaryBarColor='grey'
        summaryBarAlpha=0.25
        summaryColour='black'
        summaryLine=True
        summaryLineStyle='solid'
        summaryLineWidth=0.25
        summaryMarkerSize=10
        summaryMarkerType='o'
        swarmShareY=True
        swarmYlim=None
        tickAngle=45
        tickAlignment='right'
        violinOffset=0.375
        violinWidth=0.2
        violinColor='k'
        xticksize=None
        yticksize=None

    Returns:
        An matplotlib Figure.
        Organization of figure Axes.
    '''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError("The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # make sure that at least x, y, and idx are specified.
    if x is None and y is None and idx is None:
        raise ValueError('You need to specify `x` and `y`, or `idx`. Neither has been specifed.')

    if x is None:
        # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column.
        datatype='wide'
        # Check that the idx are legit columns.
        all_idx=np.unique([element for tupl in idx for element in tupl])
        # # melt the data.
        # data=pd.melt(data,value_vars=all_idx)
        # x='variable'
        # y='value'
    else:
        # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint.
        datatype='long'
        # make sure y is not none.
        if y is None:
            raise ValueError("`paired` is false, but no y-column given.")
        # Calculate Ns.
        counts=data.groupby(x)[y].count()

    # Get and set levels of data[x]
    if paired is True:
        violinWidth=0.1
        # # Calculate Ns--which should be simply the number of rows in data.
        # counts=len(data)
        # is idcol supplied?
        if idcol is None and datatype=='long':
            raise ValueError('`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.')
        if idx is not None:
            # check if multi-plot or not
            if all(isinstance(element, str) for element in idx):
                # check that every idx is a column name.
                idx_not_in_cols=[n
                for n in idx
                if n not in data[x].unique()]
                if len(idx_not_in_cols)!=0:
                    raise ValueError(str(idx_not_in_cols)+" cannot be found in the columns of `data`.")
                # data_wide_cols=[n for n in idx if n in data.columns]
                # if idx is supplied but not a multiplot (ie single list or tuple)
                if len(idx) != 2:
                    raise ValueError(idx+" does not have length 2.")
                else:
                    tuple_in=(tuple(idx, ),)
                widthratio=[1]
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_cols=[n
                for tup in idx
                for n in tup
                if n not in data[x].unique()]
                if len(idx_not_in_cols)!=0:
                    raise ValueError(str(idx_not_in_cols)+" cannot be found in the column "+x)
                # data_wide_cols=[n for tup in idx for n in tup if n in data.columns]
                if ( any(len(element) != 2 for element in idx) ):
                    # If any of the tuples does not contain exactly 2 elements.
                    raise ValueError(element+" does not have length 2.")
                # Make sure the widthratio of the seperate multiplot corresponds to how 
                # many groups there are in each one.
                tuple_in=idx
                widthratio=[]
                for i in tuple_in:
                    widthratio.append(len(i))
        elif idx is None:
            raise ValueError('Please specify idx.')
        showRawData=False # Just show lines, do not show data.
        showCI=False # wait till I figure out how to plot this for sns.barplot.
        if datatype=='long':
            if idx is None:
                ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
                tuple_in=tuple(np.sort(np.unique(data[x]))[0:2],)
            # pivot the dataframe if it is long!
            data_pivot=data.pivot_table(index = idcol, columns = x, values = y)

    elif paired is False:
        if idx is None:
            widthratio=[1]
            tuple_in=( tuple(data[x].unique()) ,)
            if len(tuple_in[0])>2:
                floatContrast=False
        else:
            if all(isinstance(element, str) for element in idx):
                # if idx is supplied but not a multiplot (ie single list or tuple)
                # check all every idx specified can be found in data[x]
                idx_not_in_x=[n for n in idx 
                if n not in data[x].unique()]
                if len(idx_not_in_x)!=0:
                    raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x)
                tuple_in=(idx, )
                widthratio=[1]
                if len(idx)>2:
                    floatContrast=False
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_x=[n
                for tup in idx
                for n in tup
                if n not in data[x].unique()]
                if len(idx_not_in_x)!=0:
                    raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x)
                tuple_in=idx

                if ( any(len(element)>2 for element in tuple_in) ):
                    # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                    floatContrast=False
                # Make sure the widthratio of the seperate multiplot corresponds to how 
                # many groups there are in each one.
                widthratio=[]
                for i in tuple_in:
                    widthratio.append(len(i))
            else:
                raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \
                    Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting.")

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar=True
        summaryLine=False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine=False
    # initialise statfunction
    if statfunction == None:
        statfunction=np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList=list()
    contrastListNames=list()

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol=kwargs['hue']
            if colorCol not in data.columns:
                raise ValueError(colorCol+' is not a column name.')
            colGrps=data[colorCol].unique()#.tolist()
            plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) )
        else:
            if datatype=='long':
                colGrps=data[x].unique()#.tolist()
                plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) )
            if datatype=='wide':
                plotPal=np.repeat('k',len(data))
    else:
        if datatype=='long':
            plotPal=pal
        if datatype=='wide':
            plotPal=list(map(lambda x:pal[x], data[hue]))

    if swarmYlim is None:
        # get range of _selected groups_.
        # u = list()
        # for t in tuple_in:
        #     for i in np.unique(t):
        #         u.append(i)
        # u = np.unique(u)
        u=np.unique([element for tupl in tuple_in for element in tupl])
        if datatype=='long':
            tempdat=data[data[x].isin(u)]
            swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])])
        if datatype=='wide':
            allMin=list()
            allMax=list()
            for col in u:
                allMin.append(np.min(data[col]))
                allMax.append(np.max(data[col]))
            swarm_ylim=np.array( [np.min(allMin),np.max(allMax)] )
        swarm_ylim=np.round(swarm_ylim)
    else:
        swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]])

    if summaryBar is True:
        lims=swarm_ylim
        # check that 0 lies within the desired limits.
        # if not, extend (upper or lower) limit to zero.
        if 0 not in range( int(round(lims[0])),int(round(lims[1])) ): # turn swarm_ylim to integer range.
            # check if all negative:.
            if lims[0]<0. and lims[1]<0.:
                swarm_ylim=np.array([np.min(lims),0.])
            # check if all positive.
            elif lims[0]>0. and lims[1]>0.:
                swarm_ylim=np.array([0.,np.max(lims)])

    if contrastYlim is not None:
        contrastYlim=np.array([contrastYlim[0],contrastYlim[1]])

    # plot params
    if axis_title_size is None:
        axis_title_size=27
    if yticksize is None:
        yticksize=22
    if xticksize is None:
        xticksize=22

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams={'labelsize' : axis_title_size}
    xtickParams={'labelsize' : xticksize}
    ytickParams={'labelsize' : yticksize}
    svgParams={'fonttype' : 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams) 

    if figsize is None:
        if len(tuple_in)>2:
            figsize=(12,(12/np.sqrt(2)))
        else:
            figsize=(8,(8/np.sqrt(2)))
    
    # calculate CI.
    if ci<0 or ci>100:
        raise ValueError('ci should be between 0 and 100.')
    alpha_level=(100.-ci)/100.

    # Initialise figure, taking into account desired figsize.
    fig=plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain=gridspec.GridSpec( 
        1, np.shape(tuple_in)[0], 
         # 1 row; columns based on number of tuples in tuple.
         width_ratios=widthratio,
         wspace=0 )

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        if datatype=='long':
            plotdat=data[data[x].isin(current_tuple)]
            plotdat[x]=plotdat[x].astype("category")
            plotdat[x].cat.set_categories(
                current_tuple,
                ordered=True,
                inplace=True)
            plotdat.sort_values(by=[x])
            # # Drop all nans. 
            # plotdat.dropna(inplace=True)
            summaries=plotdat.groupby(x)[y].apply(statfunction)
        if datatype=='wide':
            plotdat=data[list(current_tuple)]
            summaries=statfunction(plotdat)
            plotdat=pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA.
            
        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot.
            ax_raw=fig.add_subplot(gsMain[gsIdx],
                frame_on=False)
            ax_contrast=ax_raw.twinx()
        else:
        # Create subGridSpec with 2 rows and 1 column.
            subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1,
                subplot_spec=gsMain[gsIdx],
                wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw=plt.Subplot(fig,
                subGridSpec[0, 0],
                frame_on=False)
            ax_contrast=plt.Subplot(fig,
                subGridSpec[1, 0],
                sharex=ax_raw,
                frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast=list()
        if paired is False:
            tempplotdat=plotdat[[x,y]] # only select the columns used for x and y plotting.
            for i in range (1, len(current_tuple)):
                # Note that you start from one. No need to do auto-contrast!
                # if datatype=='long':aas
                    tempbs=bootstrap_contrast(
                        data=tempplotdat.dropna(), 
                        x=x,
                        y=y,
                        idx=[current_tuple[0], current_tuple[i]],
                        statfunction=statfunction,
                        smoothboot=smoothboot,
                        alpha_level=alpha_level,
                        reps=reps)
                    bscontrast.append(tempbs)
                    contrastList.append(tempbs)
                    contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

        #### PLOT RAW DATA.
        ax_raw.set_ylim(swarm_ylim)
        # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())
        if paired is False and showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            sw=sns.swarmplot(
                data=plotdat, 
                x=x, y=y, 
                order=current_tuple, 
                ax=ax_raw, 
                alpha=alpha, 
                palette=plotPal,
                size=rawMarkerSize,
                marker=rawMarkerType,
                **kwargs)

            if floatContrast:
                # Get horizontal offset values.
                maxXBefore=max(sw.collections[0].get_offsets().T[0])
                minXAfter=min(sw.collections[1].get_offsets().T[0])
                xposAfter=maxXBefore+floatSwarmSpacer
                xAfterShift=minXAfter-xposAfter
                # shift the (second) swarmplot
                offsetSwarmX(sw.collections[1], -xAfterShift)
                # shift the tick.
                ax_raw.set_xticks([0.,1-xAfterShift])

        elif paired is True:
            if showRawData is True:
                sw=sns.swarmplot(data=plotdat, 
                    x=x, y=y, 
                    order=current_tuple, 
                    ax=ax_raw, 
                    alpha=alpha, 
                    palette=plotPal,
                    size=rawMarkerSize,
                    marker=rawMarkerType,
                **kwargs)
            if connectPairs is True:
                # Produce paired plot with lines.
                before=plotdat[plotdat[x]==current_tuple[0]][y].tolist()
                after=plotdat[plotdat[x]==current_tuple[1]][y].tolist()
                linedf=pd.DataFrame(
                    {'before':before,
                    'after':after}
                    )
                # to get color, need to loop thru each line and plot individually.
                for ii in range(0,len(linedf)):
                    ax_raw.plot( [0,0.25], [ linedf.loc[ii,'before'],
                                            linedf.loc[ii,'after'] ],
                                linestyle='solid',
                                linewidth=pairedDeltaLineWidth,
                                color=plotPal[current_tuple[0]],
                                alpha=pairedDeltaLineAlpha,
                               )
                ax_raw.set_xlim(-0.25,0.5)
                ax_raw.set_xticks([0,0.25])
                ax_raw.set_xticklabels([current_tuple[0],current_tuple[1]])

        # if swarmYlim is None:
        #     # if swarmYlim was not specified, tweak the y-axis 
        #     # to show all the data without losing ticks and range.
        #     ## Get all yticks.
        #     axxYTicks=ax_raw.yaxis.get_majorticklocs()
        #     ## Get ytick interval.
        #     YTickInterval=axxYTicks[1]-axxYTicks[0]
        #     ## Get current ylim
        #     currentYlim=ax_raw.get_ylim()
        #     ## Extend ylim by adding a fifth of the tick interval as spacing at both ends.
        #     ax_raw.set_ylim(
        #         currentYlim[0]-(YTickInterval/5),
        #         currentYlim[1]+(YTickInterval/5)
        #         )
        #     ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())

        if summaryBar is True:
            if paired is False:
                bar_raw=sns.barplot(
                    x=summaries.index.tolist(),
                    y=summaries.values,
                    facecolor=summaryBarColor,
                    ax=ax_raw,
                    alpha=summaryBarAlpha)
                if floatContrast is True:
                    maxSwarmSpan=2/10.
                    xlocs=list()
                    for i, bar in enumerate(bar_raw.patches):
                        x_width=bar.get_x()
                        width=bar.get_width()
                        centre=x_width + (width/2.)
                        if i == 0:
                            bar.set_x(centre-maxSwarmSpan/2.)
                            xlocs.append(centre)
                        else:
                            bar.set_x(centre-xAfterShift-maxSwarmSpan/2.)
                            xlocs.append(centre-xAfterShift)
                        bar.set_width(maxSwarmSpan)
                    ax_raw.set_xticks(xlocs) # make sure xticklocs match the barplot.
                elif floatContrast is False:
                    maxSwarmSpan=4/10.
                    xpos=ax_raw.xaxis.get_majorticklocs()
                    for i, bar in enumerate(bar_raw.patches):
                        bar.set_x(xpos[i]-maxSwarmSpan/2.)
                        bar.set_width(maxSwarmSpan)
            else:
                # if paired is true
                ax_raw.bar([0,0.25], 
                    [ statfunction(plotdat[current_tuple[0]]),
                    statfunction(plotdat[current_tuple[1]]) ],
                    color=summaryBarColor,
                    alpha=0.5,
                    width=0.05)
                ## Draw zero reference line.
                ax_raw.add_artist(Line2D(
                    (ax_raw.xaxis.get_view_interval()[0],
                     ax_raw.xaxis.get_view_interval()[1]),
                    (0,0),
                    color='k', linewidth=1.25)
                                 )

        if summaryLine is True:
            if paired is True:
                xdelta=0
            else:
                xdelta=summaryLineWidth
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i-xdelta, 
                    i+xdelta), # x-coordinates
                    (m, m),
                    color=summaryColour, 
                    linestyle=summaryLineStyle)

        if showCI is True:
                sns.barplot(
                    data=plotdat, 
                    x=x, y=y, 
                    ax=ax_raw, 
                    alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple)==2:
            if paired is False:
                # Plot the CIs on the contrast axes.
                plotbootstrap(sw.collections[1],
                              bslist=tempbs,
                              ax=ax_contrast, 
                              violinWidth=violinWidth,
                              violinOffset=violinOffset,
                              markersize=summaryMarkerSize,
                              marker=summaryMarkerType,
                              offset=floatContrast,
                              color=violinColor,
                              linewidth=1)
            else:
                bootsDelta = bootstrap(
                    plotdat[current_tuple[1]]-plotdat[current_tuple[0]],
                    statfunction=statfunction,
                    smoothboot=smoothboot,
                    alpha_level=alpha_level,
                    reps=reps)
                contrastList.append(bootsDelta)
                contrastListNames.append(current_tuple[1]+' vs. '+current_tuple[0])
                summDelta = bootsDelta['summary']
                lowDelta = bootsDelta['bca_ci_low']
                highDelta = bootsDelta['bca_ci_high']

                if floatContrast:
                    xpos=0.375
                else:
                    xpos=0.25

                # Plot the summary measure.
                ax_contrast.plot(xpos, bootsDelta['summary'],
                         marker=summaryMarkerType,
                         markerfacecolor='k',
                         markersize=summaryMarkerSize,
                         alpha=0.75
                        )
                # Plot the CI.
                ax_contrast.plot([xpos, xpos],
                         [lowDelta, highDelta],
                         color='k',
                         alpha=0.75,
                         # linewidth=1,
                         linestyle='solid'
                        )
                
                # Plot the violin-plot.
                v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], 
                                           widths = violinWidth, 
                                           showextrema = False, 
                                           showmeans = False)
                halfviolin(v, half = 'right', color = 'k')

            if floatContrast:
                # Set reference lines
                if paired is False:
                    ## First get leftmost limit of left reference group
                    xtemp, _=np.array(sw.collections[0].get_offsets()).T
                    leftxlim=xtemp.min()
                    ## Then get leftmost limit of right test group
                    xtemp, _=np.array(sw.collections[1].get_offsets()).T
                    rightxlim=xtemp.min()
                    ref=tempbs['summary']
                else:
                    leftxlim=0
                    rightxlim=0.25
                    ref=bootsDelta['summary']
                    ax_contrast.set_xlim(-0.25, 0.5) # does this work?

                ## zero line
                ax_contrast.hlines(0,                   # y-coordinates
                                leftxlim, 3.5,       # x-coordinates, start and end.
                                linestyle=contrastZeroLineStyle,
                                linewidth=1,
                                color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(ref, 
                                rightxlim, 3.5,        # x-coordinates, start and end.
                                linestyle=contrastEffectSizeLineStyle,
                                linewidth=1,
                                color=contrastEffectSizeLineColor)


                if paired is False:
                    es=float(tempbs['summary'])
                    refSum=tempbs['statistic_ref']
                else:
                    es=float(bootsDelta['summary'])
                    refSum=statfunction(plotdat[current_tuple[0]])
                ## If the effect size is positive, shift the right axis up.
                if es>0:
                    rightmin=ax_raw.get_ylim()[0]-es
                    rightmax=ax_raw.get_ylim()[1]-es
                ## If the effect size is negative, shift the right axis down.
                elif es<0:
                    rightmin=ax_raw.get_ylim()[0]+es
                    rightmax=ax_raw.get_ylim()[1]+es
                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx>0:
                    ax_contrast.set_ylabel('')
                align_yaxis(ax_raw, refSum, ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                if paired is False:
                    # Set xlims so everything is properly visible!
                    swarm_xbounds=ax_raw.get_xbound()
                    ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), 
                        swarm_xbounds[1] + (summaryLineWidth * 1.1))
                else:
                    ax_contrast.set_xlim(-0.05,0.25+violinWidth)

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(
                bslist=bscontrast,
                ax=ax_contrast,
                violinWidth=violinWidth,
                violinOffset=violinOffset,
                markersize=summaryMarkerSize,
                marker=summaryMarkerType,
                linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx>0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList=pd.DataFrame(contrastList).T
    contrastList.columns=contrastListNames

    # Get number of axes in figure for aesthetic tweaks.
    axesCount=len(fig.get_axes())
    for i in range(0, axesCount, 2):
        # Set new tick labels.
        # The tick labels belong to the SWARM axes
        # for both floating and non-floating plots.
        # This is because `sharex` was invoked.
        axx=fig.axes[i]
        newticklabs=list()
        for xticklab in axx.xaxis.get_ticklabels():
            t=xticklab.get_text()
            if paired:
                N=str(counts)
            else:
                N=str(counts.ix[t])

            if showGroupCount:
                newticklabs.append(t+' n='+N)
            else:
                newticklabs.append(t)
            axx.set_xticklabels(
                newticklabs,
                rotation=tickAngle,
                horizontalalignment=tickAlignment)

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx=fig.axes[i]

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if i==0:
            drawback_y(axx)

        if i!=axesCount-2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(False)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)
        else:
            drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(Line2D(
                (axx.xaxis.get_view_interval()[0], 
                    axx.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )
        
        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i==axesCount-2: # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                    bbox_to_anchor=(1.1,1.0),
                    fontsize=legendFontSize,
                    **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j,i in enumerate(range(1, axesCount, 2)):
        axx=fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright=axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                xmin=xleft-1, 
                xmax=xright+1,
                linestyle=contrastZeroLineStyle,
                linewidth=0.75,
                color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes, only is axesCount is 2.
                    # Not entirely sure why I have to do this.
                    if axesCount==2:
                        drawback_y(axx)

            sns.despine(ax=axx, 
                top=True, right=True, 
                left=False, bottom=False, 
                trim=True)
            if j==0 and axesCount==2:
                # Draw back x-axis lines connecting ticks.
                drawback_x(axx)
            # Rotate tick labels.
            rotateTicks(axx,tickAngle,tickAlignment)

        elif floatContrast is True:
            if paired is True:
                # Get the bootstrapped contrast range.
                lower=np.min(contrastList.ix['stat_array',j])
                upper=np.max(contrastList.ix['stat_array',j])
            else:
                lower=np.min(contrastList.ix['diffarray',j])
                upper=np.max(contrastList.ix['diffarray',j])
            meandiff=contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower>0:
                lower=0.
            if upper<0:
                upper=0.

            ## Get the tick interval from the left y-axis.
            leftticks=fig.get_axes()[i-1].get_yticks()
            tickstep=leftticks[1] -leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1=axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2=list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2)<meandiff:
                ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff<np.min(newticks2):
                ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2=np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))
            
            ## Despine the axes.
            sns.despine(ax=axx, trim=True, 
                bottom=False, right=False,
                left=True, top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount>2 and 
        contrastShareY is True and 
        floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower=list()
            upper=list()
            for c in range(0,len(contrastList.columns)):
                lower.append( np.min(contrastList.ix['bca_ci_low',c]) )
                upper.append( np.max(contrastList.ix['bca_ci_high',c]) )
            lower=np.min(lower)
            upper=np.max(upper)
        else:
            lower=contrastYlim[0]
            upper=contrastYlim[1]

        normalizeContrastY(fig, 
            contrast_ylim = contrastYlim, 
            show_all_yaxes = showAllYAxes)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)
    
    # And we're all done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList
import numpy as np
import seaborn.apionly as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid", context="notebook")

iris2 = sns.load_dataset('iris')


def covariance(X, Y):
    xhat = np.mean(X)
    yhat = np.mean(Y)
    epsilon = 0
    for x, y in zip(X, Y):
        epsilon = epsilon + (x - xhat) * (y - yhat)
    return epsilon / (len(X) - 1)


# Testing results agains existing function
print("My covariance function: {}".format(covariance([1, 3, 4], [1, 0, 2])))
print("Numpy covariance function: {}".format(np.cov([1, 3, 4], [1, 0, 2])))


def correlation(X, Y):
    return (covariance(X, Y) / (np.std(X, ddof=1) * np.std(Y, ddof=1))
            )  # we had to indicat ddof=1 the unbiased std


print("My Correlation: {}".format(correlation([1, 1, 4, 3], [1, 0, 2, 2])))
print("Numpy corrcoef: {}".format(np.corrcoef([1, 1, 4, 3], [1, 0, 2, 2])))
boston = datasets.load_boston()
dat = pd.DataFrame(boston.data, columns=boston.feature_names)
dat.head()

target = pd.DataFrame(boston.target, columns=["MEDV"])
target.head()

df = dat.copy()
df = pd.concat([df, target], axis=1)
df.head()

df.info()
df.describe()

snsapi.set()
df.hist(bins = 10, figsize = (15,10));
plt.show();

corr_matrix = df.corr()
corr_matrix['MEDV']

sns.heatmap(corr_matrix);
plt.show()

print(boston['DESCR'])

dat1 = df.loc[:, ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']]

X_train, X_test, y_train, y_test = train_test_split(dat1, target, test_size = 0.2, random_state=42)
y_train = y_train.values.ravel()
def run_predict_structure(generator=None, title=None):
    constraints = {'edge_count': (1000, 1100)}

    accuracy_at_k = [0] * 5

    if generator != None and title != None:
        samples = 100
        for sample in xrange(samples):
            G = structural_identities.constrained_generation(
                generator, constraints)
            cluster, types = predict_structure(G, trials=20)

            print sample, types[cluster.index(min(cluster))]

            array = np.array(cluster)
            order = array.argsort()
            ranks = order.argsort().tolist()

            k = -1
            for i in xrange(len(cluster)):  # 5 types of rg
                if title == types[ranks.index(i)]:
                    k = i
                    break

            j = len(cluster) - 1
            while j >= k:
                accuracy_at_k[j] += 1
                j -= 1

        plt.figure(1)

        for i in xrange(len(accuracy_at_k)):
            accuracy_at_k[i] /= (samples * 1.0)

        plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o')
        plt.xlabel('k (top k labels)')
        plt.ylim((0, 1.1))
        plt.ylabel('Accuracy @ k')
        plt.title('Prediction Accuracy for ' + title + ' Random Graphs')

        plt.show()

    # Uniformly sample across rg
    elif generator == None:
        confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)]
        samples = 100
        index = [
            'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert',
            'Planted Partition Model'
        ]
        constraints_enforced = True
        rgs = [
            structural_identities.watts_strogatz_generator,
            structural_identities.geometric_generator,
            structural_identities.erdos_renyi_generator,
            structural_identities.barabasi_albert_generator,
            structural_identities.planted_partition_generator
        ]

        for j, rg in enumerate(rgs):
            title = index[j]
            actual = j
            for i in xrange(samples):
                G = structural_identities.constrained_generation(
                    rg, constraints)

                cluster, types = predict_structure(G, 5, constraints_enforced)

                predicted = cluster.index(min(cluster))
                print title, types[predicted]

                confusion_matrix[actual][predicted] += 1

                array = np.array(cluster)
                order = array.argsort()
                ranks = order.argsort().tolist()

                k = -1
                for i in xrange(len(cluster)):  # 5 types of rg
                    if title == types[ranks.index(i)]:
                        k = i
                        break

                j = len(cluster) - 1
                while j >= k:
                    accuracy_at_k[j] += 1
                    j -= 1

        small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM']

        for i in xrange(len(accuracy_at_k)):
            accuracy_at_k[i] /= (samples * 1.0 * len(rgs))

        print accuracy_at_k

        if constraints_enforced:
            plt.plot([i for i in xrange(1, 6)],
                     accuracy_at_k,
                     marker='o',
                     color='red')
        else:
            plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o')
        plt.xlabel('k (top k labels)')
        plt.ylim((0, 1.1))
        plt.ylabel('Accuracy @ k')
        plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs')

        plt.show()

        sns.set()
        ax = plt.axes()
        sns.heatmap(confusion_matrix,
                    ax=ax,
                    cmap="YlGnBu",
                    yticklabels=index,
                    xticklabels=small_index)
        ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs')
        plt.tight_layout()
        plt.show()
Esempio n. 13
0
#path = r'/home/prithvi/git_work/GetOldTweets-python'
path = os.getcwd()
allFiles = glob.glob(path + "/bitcoinTweets_*.txt")
frame = pd.DataFrame()

list_ = []
for i, file_ in enumerate(allFiles):
    df = pd.read_csv(file_, sep='::', engine='python', header=None)
    df['month'] = i + 16
    list_.append(df)
frame = pd.concat(list_)
frame.columns = ['tweets', 'sentiment', 'date']
#print frame

frame['sentiment'] = frame['sentiment'].astype('category')
sns.set()

plt.figure()

sns.countplot(x='sentiment', hue='date', data=frame, palette="Greens_d")
#grouped = frame.groupby('month')
'''
for name,group in grouped:
    print name
    print group
    group.hist(by='month', column='fare')
'''
#frame.hist(by='month', column='sentiment')
plt.show()
#frame.sentiment.groupby('month').value_counts().plot.bar(stacked=True)
#plt.show()
Esempio n. 14
0
from seaborn.apionly import set
set()

from .grids import *
from .miscplots import *

__version__ = "0.0.0"
Esempio n. 15
0
def plottingData():

    df = pd.read_csv(os.getcwd() + "/" + args.data,
                     delimiter='\t',
                     header=0,
                     sep='\t')

    if ',' in args.option and not args.pdf:

        # Spliting variable
        numVar = args.option.split(',')

        # Deleting quotes
        numVar[0].strip('"')
        numVar[1].strip('"')
        fig, (ax1) = plt.subplots(nrows=1)

        # Color by the Probability Density Function.
        # Kernel density estimation is a way to estimate
        # the probability density function (PDF) of a random
        # variable in a non-parametric way

        # Setting data
        x = df[numVar[0]]
        y = df[numVar[1]]

        # Calculate the point density
        xy = np.vstack([x, y])
        z = gaussian_kde(xy)(xy)

        # Sort the points by density, so that the densest points are plotted last
        idx = z.argsort()
        x, y, z = x[idx], y[idx], z[idx]

        # Setting plot type
        pdf = ax1.scatter(x, y, c=z, s=50, edgecolor='')

        # Plot title
        ax1.set_title(numVar[0] + ' by ' + numVar[1])

        # Hide right and top spines
        ax1.spines['right'].set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.yaxis.set_ticks_position('left')
        ax1.xaxis.set_ticks_position('bottom')

        # Set x and y limits
        xmin = df["" + numVar[0] + ""].min() - 1
        xmax = df["" + numVar[0] + ""].max() + 1
        ymin = df["" + numVar[1] + ""].min() - 1
        ymax = df["" + numVar[1] + ""].max() + 1
        plt.xlim(xmin, xmax)
        plt.ylim(ymin, ymax)

        # Set x and y labels
        plt.xlabel(numVar[0])
        plt.ylabel(numVar[1])

        # Adding the color bar
        colbar = plt.colorbar(pdf)
        colbar.set_label('Probability Density Function')
        plt.show()

    elif not ',' in args.option:

        fig, (ax1) = plt.subplots(nrows=1)
        ax1.plot(df['#Frame'], df[args.option])
        ax1.set_title(args.option + ' by Time')
        ax1.spines['right'].set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.yaxis.set_ticks_position('left')
        ax1.xaxis.set_ticks_position('bottom')
        plt.xlabel('Time (ps)')
        xmin1 = df['#Frame'].min() - 1
        xmax1 = df['#Frame'].max() + 1
        plt.xlim(xmin1, xmax1)
        plt.ylabel(args.option)
        plt.show()

    elif ',' in args.option and args.pdf == 'kde':

        import seaborn.apionly as sns
        sns.set(style='white')
        numVar = args.option.split(',')
        numVar[0].strip('"')
        numVar[1].strip('"')

        # Distribution plot of two variables using KDE method with seaborn
        sns.jointplot(x=numVar[0],
                      y=numVar[1],
                      data=df,
                      kind="kde",
                      space=0,
                      color="b")
        plt.show()
def test_edge_imputation():
    constraints = {'edge_count': (1000, 1100)}
    accuracy_at_k = [0] * 5

    confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)]
    samples = 100
    index = [
        'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert',
        'Planted Partition Model'
    ]
    constraints_enforced = False
    rgs = [
        structural_identities.watts_strogatz_generator,
        structural_identities.geometric_generator,
        structural_identities.erdos_renyi_generator,
        structural_identities.barabasi_albert_generator,
        structural_identities.planted_partition_generator
    ]

    for uni, rg in enumerate(rgs):
        title = index[uni]
        actual = uni
        created_graphs = []
        for i in xrange(samples):
            G = structural_identities.constrained_generation(rg, constraints)

            degree_sequence = [1] * G.number_of_nodes()

            new_G = random_graphs.configuration_model(degree_sequence)
            new_G = impute_edge_algorithm(new_G, G)
            created_graphs.append(new_G)

            cluster, types = predict_structure(new_G, 2, constraints_enforced)

            predicted = cluster.index(min(cluster))
            print title, types[predicted]

            confusion_matrix[actual][predicted] += 1

            array = np.array(cluster)
            order = array.argsort()
            ranks = order.argsort().tolist()

            k = -1
            for i in xrange(len(cluster)):  # 5 types of rg
                if title == types[ranks.index(i)]:
                    k = i
                    break

            j = len(cluster) - 1
            while j >= k:
                accuracy_at_k[j] += 1
                j -= 1

        # HERE we plot distros
        observed_metrics, dic = structural_identities.analyze_structural_identity_graphs(
            created_graphs, uni)
        predict_metrics, dic = structural_identities.analyze_structural_identity(
            rg, samples, uni)  # constraints=None):
        structural_identities.graph_created_distributions(
            uni, observed_metrics, predict_metrics, dic)

    small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM']

    plt.figure(10)

    for i in xrange(len(accuracy_at_k)):
        accuracy_at_k[i] /= (samples * 1.0 * len(rgs))

    if constraints_enforced:
        plt.plot([i for i in xrange(1, 6)],
                 accuracy_at_k,
                 marker='o',
                 color='red')
    else:
        plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o')

    plt.xlabel('k (top k labels)')
    plt.ylim((0, 1.1))
    plt.ylabel('Accuracy @ k')
    plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs')

    plt.show()

    sns.set()
    ax = plt.axes()
    sns.heatmap(confusion_matrix,
                ax=ax,
                cmap="YlGnBu",
                yticklabels=index,
                xticklabels=small_index)
    ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs')
    plt.tight_layout()
    plt.show()
Esempio n. 17
0
def plot_scree(clf_pca,
               xlim=[-1, 10],
               ylim=[-0.1, 1.0],
               required_var=0.90,
               figsize=(10, 5)):
    """Create side-by-side scree plots for analyzing variance of principal
    components from PCA.

    Parameters
    ----------
    clf_pca : sklearn.decomposition.PCA
        A fitted scikit-learn PCA model.
    xlim : list
        X-axis range. If `required_var` is supplied, the maximum x-axis value
        will automatically be set so that the required variance line is visible
        on the plot. Defaults to [-1, 10].
    ylim : list
        Y-axis range. Defaults to [-0.1, 1.0].
    required_var : float, int, None
        A value of variance to distinguish on the scree plot. Set to None to
        not include on the plot. Defaults to 0.90.
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (10, 5).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    # Ensure we have the a PCA model
    assert isinstance(clf_pca, decomposition.PCA), (
        "Models of type {0} are not supported. Only models of type "
        "sklearn.decomposition.PCA are supported.".format(type(clf_pca)))
    # Extract variances from the model
    variances = clf_pca.explained_variance_ratio_
    # Set plot style and scale up font size
    sns.set_style("whitegrid")
    sns.set(font_scale=1.2)
    # Set up figure and generate subplots
    try:
        fig = plt.figure('scree', figsize=figsize)
        # First plot (in subplot)
        plt.subplot(1, 2, 1)
        plt.xlabel("Component Number")
        plt.ylabel("Proportion of Variance Explained")
        plt.xlim(xlim)
        plt.ylim(ylim)
        plt.plot(variances, marker='o', linestyle='--')
        # Second plot (in subplot)
        cumsum = np.cumsum(variances)  # Cumulative sum of variances explained
        plt.subplot(1, 2, 2)
        plt.xlabel("Number of Components")
        plt.ylabel("Proportion of Variance Explained")
        plt.xlim(xlim)
        plt.ylim(ylim)
        plt.plot(cumsum, marker='o', linestyle='--')
        # Add marker for required variance line
        if required_var is not None:
            required_var_components = np.argmax(cumsum >= required_var) + 1
            # Update xlim if it is too small to see the marker
            if xlim[1] <= required_var_components:
                plt.xlim([xlim[0], required_var_components + 1])
            # Add the marker and legend to the plot
            plt.axvline(x=required_var_components,
                        c='r',
                        linestyle='dashed',
                        label="> {0:.0f}% Var. Explained: {1} "
                        "components".format(required_var * 100,
                                            required_var_components))
            legend = plt.legend(loc='lower right', frameon=True)
            legend.get_frame().set_facecolor('#FFFFFF')
        plt.show()
    except:
        raise  # Re-raise the exception
    finally:
        sns.reset_orig()
    return fig
Esempio n. 18
0
def pairedcontrast(data,
                   x,
                   y,
                   idcol,
                   reps=3000,
                   statfunction=None,
                   idx=None,
                   figsize=None,
                   beforeAfterSpacer=0.01,
                   violinWidth=0.005,
                   floatOffset=0.05,
                   showRawData=False,
                   showAllYAxes=False,
                   floatContrast=True,
                   smoothboot=False,
                   floatViolinOffset=None,
                   showConnections=True,
                   summaryBar=False,
                   contrastYlim=None,
                   swarmYlim=None,
                   barWidth=0.005,
                   rawMarkerSize=8,
                   rawMarkerType='o',
                   summaryMarkerSize=10,
                   summaryMarkerType='o',
                   summaryBarColor='grey',
                   meansSummaryLineStyle='solid',
                   contrastZeroLineStyle='solid',
                   contrastEffectSizeLineStyle='solid',
                   contrastZeroLineColor='black',
                   contrastEffectSizeLineColor='black',
                   pal=None,
                   legendLoc=2,
                   legendFontSize=12,
                   legendMarkerScale=1,
                   axis_title_size=None,
                   yticksize=None,
                   xticksize=None,
                   tickAngle=45,
                   tickAlignment='right',
                   **kwargs):

    # Preliminaries.
    data = data.dropna()

    # plot params
    if axis_title_size is None:
        axis_title_size = 15
    if yticksize is None:
        yticksize = 12
    if xticksize is None:
        xticksize = 12

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)

    ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
    if idx is None:
        idx = tuple(np.unique(data[x])[0:2], )
    else:
        # check if multi-plot or not
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            if len(idx) != 2:
                print(idx, "does not have length 2.")
                sys.exit(0)
            else:
                idx = (tuple(idx, ), )
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            if (any(len(element) != 2 for element in idx)):
                # If any of the tuples contain more than 2 elements.
                print(element, "does not have length 2.")
                sys.exit(0)
    if floatViolinOffset is None:
        floatViolinOffset = beforeAfterSpacer / 2
    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])
    if swarmYlim is not None:
        swarmYlim = np.array([swarmYlim[0], swarmYlim[1]])

    ## Here we define the palette on all the levels of the 'x' column.
    ## Thus, if the same pandas dataframe is re-used across different plots,
    ## the color identity of each group will be maintained.
    ## Set palette based on total number of categories in data['x'] or data['hue_column']
    if 'hue' in kwargs:
        u = kwargs['hue']
    else:
        u = x
    if ('color' not in kwargs and 'hue' not in kwargs):
        kwargs['color'] = 'k'

    if pal is None:
        pal = dict(
            zip(data[u].unique(),
                sns.color_palette(n_colors=len(data[u].unique()))))
    else:
        pal = pal

    # Initialise figure.
    if figsize is None:
        if len(idx) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (6, 6)
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `levs_tuple` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(idx)[0])  # 1 row; columns based on number of tuples in tuple.
    # Set default statfunction
    if statfunction is None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    for gsIdx, xlevs in enumerate(idx):
        ## Pivot tempdat to get before and after lines.
        data_pivot = data.pivot_table(index=idcol, columns=x, values=y)

        # Start plotting!!
        if floatContrast is True:
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx])
            ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      gsSubGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)

        ## Plot raw data as swarmplot or stripplot.
        if showRawData is True:
            swarm_raw = sns.swarmplot(data=data,
                                      x=x,
                                      y=y,
                                      order=xlevs,
                                      ax=ax_raw,
                                      palette=pal,
                                      size=rawMarkerSize,
                                      marker=rawMarkerType,
                                      **kwargs)
        else:
            swarm_raw = sns.stripplot(data=data,
                                      x=x,
                                      y=y,
                                      order=xlevs,
                                      ax=ax_raw,
                                      palette=pal,
                                      **kwargs)
        swarm_raw.set_ylim(swarmYlim)

        ## Get some details about the raw data.
        maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0])
        minXAfter = min(swarm_raw.collections[1].get_offsets().T[0])
        if showRawData is True:
            #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2
            beforeAfterSpacer = 1
        xposAfter = maxXBefore + beforeAfterSpacer
        xAfterShift = minXAfter - xposAfter

        ## shift the after swarmpoints closer for aesthetic purposes.
        offsetSwarmX(swarm_raw.collections[1], -xAfterShift)

        ## pandas DataFrame of 'before' group
        x1 = pd.DataFrame({
            str(xlevs[0] + '_x'):
            pd.Series(swarm_raw.collections[0].get_offsets().T[0]),
            xlevs[0]:
            pd.Series(swarm_raw.collections[0].get_offsets().T[1]),
            '_R_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[0]),
            '_G_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[1]),
            '_B_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[2]),
        })
        ## join the RGB columns into a tuple, then assign to a column.
        x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1)
        x1 = x1.sort_values(by=xlevs[0])
        x1.index = data_pivot.sort_values(by=xlevs[0]).index

        ## pandas DataFrame of 'after' group
        ### create convenient signifiers for column names.
        befX = str(xlevs[0] + '_x')
        aftX = str(xlevs[1] + '_x')

        x2 = pd.DataFrame({
            aftX:
            pd.Series(swarm_raw.collections[1].get_offsets().T[0]),
            xlevs[1]:
            pd.Series(swarm_raw.collections[1].get_offsets().T[1])
        })
        x2 = x2.sort_values(by=xlevs[1])
        x2.index = data_pivot.sort_values(by=xlevs[1]).index

        ## Join x1 and x2, on both their indexes.
        plotPoints = x1.merge(x2,
                              left_index=True,
                              right_index=True,
                              how='outer')

        ## Add the hue column if hue argument was passed.
        if 'hue' in kwargs:
            h = kwargs['hue']
            plotPoints[h] = data.pivot(index=idcol, columns=x,
                                       values=h)[xlevs[0]]
            swarm_raw.legend(loc=legendLoc,
                             fontsize=legendFontSize,
                             markerscale=legendMarkerScale)

        ## Plot the lines to join the 'before' points to their respective 'after' points.
        if showConnections is True:
            for i in plotPoints.index:
                ax_raw.plot(
                    [plotPoints.ix[i, befX], plotPoints.ix[i, aftX]],
                    [plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]]],
                    linestyle='solid',
                    color=plotPoints.ix[i, '_hue_'],
                    linewidth=0.75,
                    alpha=0.75)

        ## Hide the raw swarmplot data if so desired.
        if showRawData is False:
            swarm_raw.collections[0].set_visible(False)
            swarm_raw.collections[1].set_visible(False)

        if showRawData is True:
            #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2
            maxSwarmSpan = 0.5
        else:
            maxSwarmSpan = barWidth

        ## Plot Summary Bar.
        if summaryBar is True:
            # Calculate means
            means = data.groupby([x], sort=True).mean()[y]
            # # Calculate medians
            # medians = data.groupby([x], sort = True).median()[y]

            ## Draw summary bar.
            bar_raw = sns.barplot(x=means.index,
                                  y=means.values,
                                  order=xlevs,
                                  ax=ax_raw,
                                  ci=0,
                                  facecolor=summaryBarColor,
                                  alpha=0.25)
            ## Draw zero reference line.
            ax_raw.add_artist(
                Line2D((ax_raw.xaxis.get_view_interval()[0],
                        ax_raw.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + width / 2.
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan / 2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                bar.set_width(maxSwarmSpan)

        # Get y-limits of the treatment swarm points.
        beforeRaw = pd.DataFrame(swarm_raw.collections[0].get_offsets())
        afterRaw = pd.DataFrame(swarm_raw.collections[1].get_offsets())
        before_leftx = min(beforeRaw[0])
        after_leftx = min(afterRaw[0])
        after_rightx = max(afterRaw[0])
        after_stat_summary = statfunction(beforeRaw[1])

        # Calculate the summary difference and CI.
        plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]]
        plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0]

        tempseries = plotPoints['delta_y'].tolist()
        test = tempseries.count(tempseries[0]) != len(tempseries)

        bootsDelta = bootstrap(plotPoints['delta_y'],
                               statfunction=statfunction,
                               smoothboot=smoothboot,
                               reps=reps)
        summDelta = bootsDelta['summary']
        lowDelta = bootsDelta['bca_ci_low']
        highDelta = bootsDelta['bca_ci_high']

        # set new xpos for delta violin.
        if floatContrast is True:
            if showRawData is False:
                xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset
            else:
                xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan
        else:
            xposPlusViolin = xposAfter
        if showRawData is True:
            # If showRawData is True and floatContrast is True,
            # set violinwidth to the barwidth.
            violinWidth = maxSwarmSpan

        xmaxPlot = xposPlusViolin + violinWidth

        # Plot the summary measure.
        ax_contrast.plot(xposPlusViolin,
                         summDelta,
                         marker='o',
                         markerfacecolor='k',
                         markersize=summaryMarkerSize,
                         alpha=0.75)

        # Plot the CI.
        ax_contrast.plot([xposPlusViolin, xposPlusViolin],
                         [lowDelta, highDelta],
                         color='k',
                         alpha=0.75,
                         linestyle='solid')

        # Plot the violin-plot.
        v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin],
                                   widths=violinWidth,
                                   showextrema=False,
                                   showmeans=False)
        halfviolin(v, half='right', color='k')

        # Remove left axes x-axis title.
        ax_raw.set_xlabel("")
        # Remove floating axes y-axis title.
        ax_contrast.set_ylabel("")

        # Set proper x-limits
        ax_raw.set_xlim(before_leftx - beforeAfterSpacer / 2, xmaxPlot)
        ax_raw.get_xaxis().set_view_interval(
            before_leftx - beforeAfterSpacer / 2,
            after_rightx + beforeAfterSpacer / 2)
        ax_contrast.set_xlim(ax_raw.get_xlim())

        if floatContrast is True:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            # Make sure they have the same y-limits.
            ax_contrast.set_ylim(ax_raw.get_ylim())

            # Drawing in the x-axis for ax_raw.
            ## Set the tick labels!
            ax_raw.set_xticklabels(xlevs,
                                   rotation=tickAngle,
                                   horizontalalignment=tickAlignment)
            ## Get lowest y-value for ax_raw.
            y = ax_raw.get_yaxis().get_view_interval()[0]

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]),
                        ax_contrast, 0)

            # Add label to floating axes. But on ax_raw!
            ax_raw.text(x=deltaSwarmX,
                        y=ax_raw.get_yaxis().get_view_interval()[0],
                        horizontalalignment='left',
                        s='Difference',
                        fontsize=15)

            # Set reference lines
            ## zero line
            ax_contrast.hlines(
                0,  # y-coordinate
                ax_contrast.xaxis.get_majorticklocs()
                [0],  # x-coordinates, start and end.
                ax_raw.xaxis.get_view_interval()[1],
                linestyle='solid',
                linewidth=0.75,
                color='black')

            ## effect size line
            ax_contrast.hlines(summDelta,
                               ax_contrast.xaxis.get_majorticklocs()[1],
                               ax_raw.xaxis.get_view_interval()[1],
                               linestyle='solid',
                               linewidth=0.75,
                               color='black')

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.)
        else:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            fig.add_subplot(ax_raw)
            fig.add_subplot(ax_contrast)
        ax_contrast.set_ylim(contrastYlim)
        # Calculate p-values.
        # 1-sample t-test to see if the mean of the difference is different from 0.
        ttestresult = ttest_1samp(plotPoints['delta_y'], popmean=0)[1]
        bootsDelta['ttest_pval'] = ttestresult
        contrastList.append(bootsDelta)
        contrastListNames.append(str(xlevs[1]) + ' v.s. ' + str(xlevs[0]))

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Now we iterate thru the contrast axes to normalize all the ylims.
    for j, i in enumerate(range(1, len(fig.get_axes()), 2)):
        axx = fig.get_axes()[i]
        ## Get max and min of the dataset.
        lower = np.min(contrastList.ix['stat_array', j])
        upper = np.max(contrastList.ix['stat_array', j])
        meandiff = contrastList.ix['summary', j]

        ## Make sure we have zero in the limits.
        if lower > 0:
            lower = 0.
        if upper < 0:
            upper = 0.

        ## Get tick distance on raw axes.
        ## This will be the tick distance for the contrast axes.
        rawAxesTicks = fig.get_axes()[i - 1].yaxis.get_majorticklocs()
        rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0]

        ## First re-draw of axis with new tick interval
        axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist))
        newticks1 = fig.get_axes()[i].get_yticks()

        if floatContrast is False:
            if (showAllYAxes is False and i in range(2, len(fig.get_axes()))):
                axx.get_yaxis().set_visible(showAllYAxes)
            else:
                ## Obtain major ticks that comfortably encompass lower and upper.
                newticks2 = list()
                for a, b in enumerate(newticks1):
                    if (b >= lower and b <= upper):
                        # if the tick lies within upper and lower, take it.
                        newticks2.append(b)
                # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
                if np.max(newticks2) < meandiff:
                    ind = np.where(newticks1 == np.max(newticks2))[0][
                        0]  # find out the max tick index in newticks1.
                    newticks2.append(newticks1[ind + 1])
                elif meandiff < np.min(newticks2):
                    ind = np.where(newticks1 == np.min(newticks2))[0][
                        0]  # find out the min tick index in newticks1.
                    newticks2.append(newticks1[ind - 1])
                newticks2 = np.array(newticks2)
                newticks2.sort()
                axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

                ## Draw zero reference line.
                axx.hlines(
                    y=0,
                    xmin=fig.get_axes()[i].get_xaxis().get_view_interval()[0],
                    xmax=fig.get_axes()[i].get_xaxis().get_view_interval()[1],
                    linestyle=contrastZeroLineStyle,
                    linewidth=0.75,
                    color=contrastZeroLineColor)

                sns.despine(ax=fig.get_axes()[i],
                            trim=True,
                            bottom=False,
                            right=True,
                            left=False,
                            top=True)

                ## Draw back the lines for the relevant y-axes.
                drawback_y(axx)

                ## Draw back the lines for the relevant x-axes.
                drawback_x(axx)

        elif floatContrast is True:
            ## Get the original ticks on the floating y-axis.
            newticks1 = fig.get_axes()[i].get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Re-draw the axis.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine and trim the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    for i in range(0, len(fig.get_axes()), 2):
        # Loop through the raw data swarmplots and despine them appropriately.
        if floatContrast is True:
            sns.despine(ax=fig.get_axes()[i], trim=True, right=True)

        else:
            sns.despine(ax=fig.get_axes()[i],
                        trim=True,
                        bottom=True,
                        right=True)
            fig.get_axes()[i].get_xaxis().set_visible(False)

        # Draw back the lines for the relevant y-axes.
        ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0]
        ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1]
        x, _ = fig.get_axes()[i].get_xaxis().get_view_interval()
        fig.get_axes()[i].add_artist(
            Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5))

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0)
    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
Esempio n. 19
0
def contrastplot(data,
                 x=None,
                 y=None,
                 idx=None,
                 idcol=None,
                 alpha=0.75,
                 axis_title_size=None,
                 ci=95,
                 contrastShareY=True,
                 contrastEffectSizeLineStyle='solid',
                 contrastEffectSizeLineColor='black',
                 contrastYlim=None,
                 contrastZeroLineStyle='solid',
                 contrastZeroLineColor='black',
                 connectPairs=True,
                 effectSizeYLabel="Effect Size",
                 figsize=None,
                 floatContrast=True,
                 floatSwarmSpacer=0.2,
                 heightRatio=(1, 1),
                 lineWidth=2,
                 legend=True,
                 legendFontSize=14,
                 legendFontProps={},
                 paired=False,
                 pairedDeltaLineAlpha=0.3,
                 pairedDeltaLineWidth=1.2,
                 pal=None,
                 rawMarkerSize=8,
                 rawMarkerType='o',
                 reps=3000,
                 showGroupCount=True,
                 showCI=False,
                 showAllYAxes=False,
                 showRawData=True,
                 smoothboot=False,
                 statfunction=None,
                 summaryBar=False,
                 summaryBarColor='grey',
                 summaryBarAlpha=0.25,
                 summaryColour='black',
                 summaryLine=True,
                 summaryLineStyle='solid',
                 summaryLineWidth=0.25,
                 summaryMarkerSize=10,
                 summaryMarkerType='o',
                 swarmShareY=True,
                 swarmYlim=None,
                 tickAngle=45,
                 tickAlignment='right',
                 violinOffset=0.375,
                 violinWidth=0.2,
                 violinColor='k',
                 xticksize=None,
                 yticksize=None,
                 **kwargs):
    '''Takes a pandas DataFrame and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    Paired and unpaired options available.

    Keyword arguments:
        data: pandas DataFrame
            
        x: string
            column name containing categories to be plotted on the x-axis.

        y: string
            column name containing values to be plotted on the y-axis.

        idx: tuple
            flxible declaration of groupwise comparisons.

        idcol: string
            for paired plots.

        alpha: float
            alpha (transparency) of raw swarmed data points.
            
        axis_title_size=None
        ci=95
        contrastShareY=True
        contrastEffectSizeLineStyle='solid'
        contrastEffectSizeLineColor='black'
        contrastYlim=None
        contrastZeroLineStyle='solid'
        contrastZeroLineColor='black'
        effectSizeYLabel="Effect Size"
        figsize=None
        floatContrast=True
        floatSwarmSpacer=0.2
        heightRatio=(1,1)
        lineWidth=2
        legend=True
        legendFontSize=14
        legendFontProps={}
        paired=False
        pairedDeltaLineAlpha=0.3
        pairedDeltaLineWidth=1.2
        pal=None
        rawMarkerSize=8
        rawMarkerType='o'
        reps=3000
        showGroupCount=True
        showCI=False
        showAllYAxes=False
        showRawData=True
        smoothboot=False
        statfunction=None
        summaryBar=False
        summaryBarColor='grey'
        summaryBarAlpha=0.25
        summaryColour='black'
        summaryLine=True
        summaryLineStyle='solid'
        summaryLineWidth=0.25
        summaryMarkerSize=10
        summaryMarkerType='o'
        swarmShareY=True
        swarmYlim=None
        tickAngle=45
        tickAlignment='right'
        violinOffset=0.375
        violinWidth=0.2
        violinColor='k'
        xticksize=None
        yticksize=None

    Returns:
        An matplotlib Figure.
        Organization of figure Axes.
    '''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError(
            "The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # make sure that at least x, y, and idx are specified.
    if x is None and y is None and idx is None:
        raise ValueError(
            'You need to specify `x` and `y`, or `idx`. Neither has been specifed.'
        )

    if x is None:
        # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column.
        datatype = 'wide'
        # Check that the idx are legit columns.
        all_idx = np.unique([element for tupl in idx for element in tupl])
        # # melt the data.
        # data=pd.melt(data,value_vars=all_idx)
        # x='variable'
        # y='value'
    else:
        # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint.
        datatype = 'long'
        # make sure y is not none.
        if y is None:
            raise ValueError("`paired` is false, but no y-column given.")
        # Calculate Ns.
        counts = data.groupby(x)[y].count()

    # Get and set levels of data[x]
    if paired is True:
        violinWidth = 0.1
        # # Calculate Ns--which should be simply the number of rows in data.
        # counts=len(data)
        # is idcol supplied?
        if idcol is None and datatype == 'long':
            raise ValueError(
                '`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.'
            )
        if idx is not None:
            # check if multi-plot or not
            if all(isinstance(element, str) for element in idx):
                # check that every idx is a column name.
                idx_not_in_cols = [n for n in idx if n not in data[x].unique()]
                if len(idx_not_in_cols) != 0:
                    raise ValueError(
                        str(idx_not_in_cols) +
                        " cannot be found in the columns of `data`.")
                # data_wide_cols=[n for n in idx if n in data.columns]
                # if idx is supplied but not a multiplot (ie single list or tuple)
                if len(idx) != 2:
                    raise ValueError(idx + " does not have length 2.")
                else:
                    tuple_in = (tuple(idx, ), )
                widthratio = [1]
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_cols = [
                    n for tup in idx for n in tup if n not in data[x].unique()
                ]
                if len(idx_not_in_cols) != 0:
                    raise ValueError(
                        str(idx_not_in_cols) +
                        " cannot be found in the column " + x)
                # data_wide_cols=[n for tup in idx for n in tup if n in data.columns]
                if (any(len(element) != 2 for element in idx)):
                    # If any of the tuples does not contain exactly 2 elements.
                    raise ValueError(element + " does not have length 2.")
                # Make sure the widthratio of the seperate multiplot corresponds to how
                # many groups there are in each one.
                tuple_in = idx
                widthratio = []
                for i in tuple_in:
                    widthratio.append(len(i))
        elif idx is None:
            raise ValueError('Please specify idx.')
        showRawData = False  # Just show lines, do not show data.
        showCI = False  # wait till I figure out how to plot this for sns.barplot.
        if datatype == 'long':
            if idx is None:
                ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
                tuple_in = tuple(np.sort(np.unique(data[x]))[0:2], )
            # pivot the dataframe if it is long!
            data_pivot = data.pivot_table(index=idcol, columns=x, values=y)

    elif paired is False:
        if idx is None:
            widthratio = [1]
            tuple_in = (tuple(data[x].unique()), )
            if len(tuple_in[0]) > 2:
                floatContrast = False
        else:
            if all(isinstance(element, str) for element in idx):
                # if idx is supplied but not a multiplot (ie single list or tuple)
                # check all every idx specified can be found in data[x]
                idx_not_in_x = [n for n in idx if n not in data[x].unique()]
                if len(idx_not_in_x) != 0:
                    raise ValueError(
                        str(idx_not_in_x) + " cannot be found in the column " +
                        x)
                tuple_in = (idx, )
                widthratio = [1]
                if len(idx) > 2:
                    floatContrast = False
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_x = [
                    n for tup in idx for n in tup if n not in data[x].unique()
                ]
                if len(idx_not_in_x) != 0:
                    raise ValueError(
                        str(idx_not_in_x) + " cannot be found in the column " +
                        x)
                tuple_in = idx

                if (any(len(element) > 2 for element in tuple_in)):
                    # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                    floatContrast = False
                # Make sure the widthratio of the seperate multiplot corresponds to how
                # many groups there are in each one.
                widthratio = []
                for i in tuple_in:
                    widthratio.append(len(i))
            else:
                raise TypeError(
                    "The object passed to `idx` consists of a mixture of single strings and tuples. \
                    Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting."
                )

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar = True
        summaryLine = False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine = False
    # initialise statfunction
    if statfunction == None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol = kwargs['hue']
            if colorCol not in data.columns:
                raise ValueError(colorCol + ' is not a column name.')
            colGrps = data[colorCol].unique()  #.tolist()
            plotPal = dict(
                zip(colGrps, sns.color_palette(n_colors=len(colGrps))))
        else:
            if datatype == 'long':
                colGrps = data[x].unique()  #.tolist()
                plotPal = dict(
                    zip(colGrps, sns.color_palette(n_colors=len(colGrps))))
            if datatype == 'wide':
                plotPal = np.repeat('k', len(data))
    else:
        if datatype == 'long':
            plotPal = pal
        if datatype == 'wide':
            plotPal = list(map(lambda x: pal[x], data[hue]))

    if swarmYlim is None:
        # get range of _selected groups_.
        # u = list()
        # for t in tuple_in:
        #     for i in np.unique(t):
        #         u.append(i)
        # u = np.unique(u)
        u = np.unique([element for tupl in tuple_in for element in tupl])
        if datatype == 'long':
            tempdat = data[data[x].isin(u)]
            swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])])
        if datatype == 'wide':
            allMin = list()
            allMax = list()
            for col in u:
                allMin.append(np.min(data[col]))
                allMax.append(np.max(data[col]))
            swarm_ylim = np.array([np.min(allMin), np.max(allMax)])
        swarm_ylim = np.round(swarm_ylim)
    else:
        swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]])

    if summaryBar is True:
        lims = swarm_ylim
        # check that 0 lies within the desired limits.
        # if not, extend (upper or lower) limit to zero.
        if 0 not in range(int(round(lims[0])), int(round(
                lims[1]))):  # turn swarm_ylim to integer range.
            # check if all negative:.
            if lims[0] < 0. and lims[1] < 0.:
                swarm_ylim = np.array([np.min(lims), 0.])
            # check if all positive.
            elif lims[0] > 0. and lims[1] > 0.:
                swarm_ylim = np.array([0., np.max(lims)])

    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])

    # plot params
    if axis_title_size is None:
        axis_title_size = 27
    if yticksize is None:
        yticksize = 22
    if xticksize is None:
        xticksize = 22

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}
    svgParams = {'fonttype': 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams)

    if figsize is None:
        if len(tuple_in) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (8, (8 / np.sqrt(2)))

    # calculate CI.
    if ci < 0 or ci > 100:
        raise ValueError('ci should be between 0 and 100.')
    alpha_level = (100. - ci) / 100.

    # Initialise figure, taking into account desired figsize.
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(tuple_in)[0],
        # 1 row; columns based on number of tuples in tuple.
        width_ratios=widthratio,
        wspace=0)

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        if datatype == 'long':
            plotdat = data[data[x].isin(current_tuple)]
            plotdat[x] = plotdat[x].astype("category")
            plotdat[x].cat.set_categories(current_tuple,
                                          ordered=True,
                                          inplace=True)
            plotdat.sort_values(by=[x])
            # # Drop all nans.
            # plotdat.dropna(inplace=True)
            summaries = plotdat.groupby(x)[y].apply(statfunction)
        if datatype == 'wide':
            plotdat = data[list(current_tuple)]
            summaries = statfunction(plotdat)
            plotdat = pd.melt(plotdat)  ##### NOW I HAVE MELTED THE WIDE DATA.

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot.
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            # Create subGridSpec with 2 rows and 1 column.
            subGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx], wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      subGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast = list()
        if paired is False:
            tempplotdat = plotdat[[
                x, y
            ]]  # only select the columns used for x and y plotting.
            for i in range(1, len(current_tuple)):
                # Note that you start from one. No need to do auto-contrast!
                # if datatype=='long':aas
                tempbs = bootstrap_contrast(
                    data=tempplotdat.dropna(),
                    x=x,
                    y=y,
                    idx=[current_tuple[0], current_tuple[i]],
                    statfunction=statfunction,
                    smoothboot=smoothboot,
                    alpha_level=alpha_level,
                    reps=reps)
                bscontrast.append(tempbs)
                contrastList.append(tempbs)
                contrastListNames.append(current_tuple[i] + ' vs. ' +
                                         current_tuple[0])

        #### PLOT RAW DATA.
        ax_raw.set_ylim(swarm_ylim)
        # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())
        if paired is False and showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            sw = sns.swarmplot(data=plotdat,
                               x=x,
                               y=y,
                               order=current_tuple,
                               ax=ax_raw,
                               alpha=alpha,
                               palette=plotPal,
                               size=rawMarkerSize,
                               marker=rawMarkerType,
                               **kwargs)

            if floatContrast:
                # Get horizontal offset values.
                maxXBefore = max(sw.collections[0].get_offsets().T[0])
                minXAfter = min(sw.collections[1].get_offsets().T[0])
                xposAfter = maxXBefore + floatSwarmSpacer
                xAfterShift = minXAfter - xposAfter
                # shift the (second) swarmplot
                offsetSwarmX(sw.collections[1], -xAfterShift)
                # shift the tick.
                ax_raw.set_xticks([0., 1 - xAfterShift])

        elif paired is True:
            if showRawData is True:
                sw = sns.swarmplot(data=plotdat,
                                   x=x,
                                   y=y,
                                   order=current_tuple,
                                   ax=ax_raw,
                                   alpha=alpha,
                                   palette=plotPal,
                                   size=rawMarkerSize,
                                   marker=rawMarkerType,
                                   **kwargs)
            if connectPairs is True:
                # Produce paired plot with lines.
                before = plotdat[plotdat[x] == current_tuple[0]][y].tolist()
                after = plotdat[plotdat[x] == current_tuple[1]][y].tolist()
                linedf = pd.DataFrame({'before': before, 'after': after})
                # to get color, need to loop thru each line and plot individually.
                for ii in range(0, len(linedf)):
                    ax_raw.plot(
                        [0, 0.25],
                        [linedf.loc[ii, 'before'], linedf.loc[ii, 'after']],
                        linestyle='solid',
                        linewidth=pairedDeltaLineWidth,
                        color=plotPal[current_tuple[0]],
                        alpha=pairedDeltaLineAlpha,
                    )
                ax_raw.set_xlim(-0.25, 0.5)
                ax_raw.set_xticks([0, 0.25])
                ax_raw.set_xticklabels([current_tuple[0], current_tuple[1]])

        # if swarmYlim is None:
        #     # if swarmYlim was not specified, tweak the y-axis
        #     # to show all the data without losing ticks and range.
        #     ## Get all yticks.
        #     axxYTicks=ax_raw.yaxis.get_majorticklocs()
        #     ## Get ytick interval.
        #     YTickInterval=axxYTicks[1]-axxYTicks[0]
        #     ## Get current ylim
        #     currentYlim=ax_raw.get_ylim()
        #     ## Extend ylim by adding a fifth of the tick interval as spacing at both ends.
        #     ax_raw.set_ylim(
        #         currentYlim[0]-(YTickInterval/5),
        #         currentYlim[1]+(YTickInterval/5)
        #         )
        #     ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())

        if summaryBar is True:
            if paired is False:
                bar_raw = sns.barplot(x=summaries.index.tolist(),
                                      y=summaries.values,
                                      facecolor=summaryBarColor,
                                      ax=ax_raw,
                                      alpha=summaryBarAlpha)
                if floatContrast is True:
                    maxSwarmSpan = 2 / 10.
                    xlocs = list()
                    for i, bar in enumerate(bar_raw.patches):
                        x_width = bar.get_x()
                        width = bar.get_width()
                        centre = x_width + (width / 2.)
                        if i == 0:
                            bar.set_x(centre - maxSwarmSpan / 2.)
                            xlocs.append(centre)
                        else:
                            bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                            xlocs.append(centre - xAfterShift)
                        bar.set_width(maxSwarmSpan)
                    ax_raw.set_xticks(
                        xlocs)  # make sure xticklocs match the barplot.
                elif floatContrast is False:
                    maxSwarmSpan = 4 / 10.
                    xpos = ax_raw.xaxis.get_majorticklocs()
                    for i, bar in enumerate(bar_raw.patches):
                        bar.set_x(xpos[i] - maxSwarmSpan / 2.)
                        bar.set_width(maxSwarmSpan)
            else:
                # if paired is true
                ax_raw.bar([0, 0.25], [
                    statfunction(plotdat[current_tuple[0]]),
                    statfunction(plotdat[current_tuple[1]])
                ],
                           color=summaryBarColor,
                           alpha=0.5,
                           width=0.05)
                ## Draw zero reference line.
                ax_raw.add_artist(
                    Line2D((ax_raw.xaxis.get_view_interval()[0],
                            ax_raw.xaxis.get_view_interval()[1]), (0, 0),
                           color='k',
                           linewidth=1.25))

        if summaryLine is True:
            if paired is True:
                xdelta = 0
            else:
                xdelta = summaryLineWidth
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i - xdelta, i + xdelta),  # x-coordinates
                    (m, m),
                    color=summaryColour,
                    linestyle=summaryLineStyle)

        if showCI is True:
            sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple) == 2:
            if paired is False:
                # Plot the CIs on the contrast axes.
                plotbootstrap(sw.collections[1],
                              bslist=tempbs,
                              ax=ax_contrast,
                              violinWidth=violinWidth,
                              violinOffset=violinOffset,
                              markersize=summaryMarkerSize,
                              marker=summaryMarkerType,
                              offset=floatContrast,
                              color=violinColor,
                              linewidth=1)
            else:
                bootsDelta = bootstrap(plotdat[current_tuple[1]] -
                                       plotdat[current_tuple[0]],
                                       statfunction=statfunction,
                                       smoothboot=smoothboot,
                                       alpha_level=alpha_level,
                                       reps=reps)
                contrastList.append(bootsDelta)
                contrastListNames.append(current_tuple[1] + ' vs. ' +
                                         current_tuple[0])
                summDelta = bootsDelta['summary']
                lowDelta = bootsDelta['bca_ci_low']
                highDelta = bootsDelta['bca_ci_high']

                if floatContrast:
                    xpos = 0.375
                else:
                    xpos = 0.25

                # Plot the summary measure.
                ax_contrast.plot(xpos,
                                 bootsDelta['summary'],
                                 marker=summaryMarkerType,
                                 markerfacecolor='k',
                                 markersize=summaryMarkerSize,
                                 alpha=0.75)
                # Plot the CI.
                ax_contrast.plot(
                    [xpos, xpos],
                    [lowDelta, highDelta],
                    color='k',
                    alpha=0.75,
                    # linewidth=1,
                    linestyle='solid')

                # Plot the violin-plot.
                v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos],
                                           widths=violinWidth,
                                           showextrema=False,
                                           showmeans=False)
                halfviolin(v, half='right', color='k')

            if floatContrast:
                # Set reference lines
                if paired is False:
                    ## First get leftmost limit of left reference group
                    xtemp, _ = np.array(sw.collections[0].get_offsets()).T
                    leftxlim = xtemp.min()
                    ## Then get leftmost limit of right test group
                    xtemp, _ = np.array(sw.collections[1].get_offsets()).T
                    rightxlim = xtemp.min()
                    ref = tempbs['summary']
                else:
                    leftxlim = 0
                    rightxlim = 0.25
                    ref = bootsDelta['summary']
                    ax_contrast.set_xlim(-0.25, 0.5)  # does this work?

                ## zero line
                ax_contrast.hlines(
                    0,  # y-coordinates
                    leftxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastZeroLineStyle,
                    linewidth=1,
                    color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(
                    ref,
                    rightxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastEffectSizeLineStyle,
                    linewidth=1,
                    color=contrastEffectSizeLineColor)

                if paired is False:
                    es = float(tempbs['summary'])
                    refSum = tempbs['statistic_ref']
                else:
                    es = float(bootsDelta['summary'])
                    refSum = statfunction(plotdat[current_tuple[0]])
                ## If the effect size is positive, shift the right axis up.
                if es > 0:
                    rightmin = ax_raw.get_ylim()[0] - es
                    rightmax = ax_raw.get_ylim()[1] - es
                ## If the effect size is negative, shift the right axis down.
                elif es < 0:
                    rightmin = ax_raw.get_ylim()[0] + es
                    rightmax = ax_raw.get_ylim()[1] + es
                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx > 0:
                    ax_contrast.set_ylabel('')
                align_yaxis(ax_raw, refSum, ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                if paired is False:
                    # Set xlims so everything is properly visible!
                    swarm_xbounds = ax_raw.get_xbound()
                    ax_contrast.set_xbound(
                        swarm_xbounds[0] - (summaryLineWidth * 1.1),
                        swarm_xbounds[1] + (summaryLineWidth * 1.1))
                else:
                    ax_contrast.set_xlim(-0.05, 0.25 + violinWidth)

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(bslist=bscontrast,
                                   ax=ax_contrast,
                                   violinWidth=violinWidth,
                                   violinOffset=violinOffset,
                                   markersize=summaryMarkerSize,
                                   marker=summaryMarkerType,
                                   linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx > 0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Get number of axes in figure for aesthetic tweaks.
    axesCount = len(fig.get_axes())
    for i in range(0, axesCount, 2):
        # Set new tick labels.
        # The tick labels belong to the SWARM axes
        # for both floating and non-floating plots.
        # This is because `sharex` was invoked.
        axx = fig.axes[i]
        newticklabs = list()
        for xticklab in axx.xaxis.get_ticklabels():
            t = xticklab.get_text()
            if paired:
                N = str(counts)
            else:
                N = str(counts.ix[t])

            if showGroupCount:
                newticklabs.append(t + ' n=' + N)
            else:
                newticklabs.append(t)
            axx.set_xticklabels(newticklabs,
                                rotation=tickAngle,
                                horizontalalignment=tickAlignment)

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx = fig.axes[i]

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if i == 0:
            drawback_y(axx)

        if i != axesCount - 2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(False)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)
        else:
            drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(
                Line2D((axx.xaxis.get_view_interval()[0],
                        axx.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i == axesCount - 2:  # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                           bbox_to_anchor=(1.1, 1.0),
                           fontsize=legendFontSize,
                           **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j, i in enumerate(range(1, axesCount, 2)):
        axx = fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright = axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                       xmin=xleft - 1,
                       xmax=xright + 1,
                       linestyle=contrastZeroLineStyle,
                       linewidth=0.75,
                       color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes, only is axesCount is 2.
                    # Not entirely sure why I have to do this.
                    if axesCount == 2:
                        drawback_y(axx)

            sns.despine(ax=axx,
                        top=True,
                        right=True,
                        left=False,
                        bottom=False,
                        trim=True)
            if j == 0 and axesCount == 2:
                # Draw back x-axis lines connecting ticks.
                drawback_x(axx)
            # Rotate tick labels.
            rotateTicks(axx, tickAngle, tickAlignment)

        elif floatContrast is True:
            if paired is True:
                # Get the bootstrapped contrast range.
                lower = np.min(contrastList.ix['stat_array', j])
                upper = np.max(contrastList.ix['stat_array', j])
            else:
                lower = np.min(contrastList.ix['diffarray', j])
                upper = np.max(contrastList.ix['diffarray', j])
            meandiff = contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower > 0:
                lower = 0.
            if upper < 0:
                upper = 0.

            ## Get the tick interval from the left y-axis.
            leftticks = fig.get_axes()[i - 1].get_yticks()
            tickstep = leftticks[1] - leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1 = axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount > 2 and contrastShareY is True and floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower = list()
            upper = list()
            for c in range(0, len(contrastList.columns)):
                lower.append(np.min(contrastList.ix['bca_ci_low', c]))
                upper.append(np.max(contrastList.ix['bca_ci_high', c]))
            lower = np.min(lower)
            upper = np.max(upper)
        else:
            lower = contrastYlim[0]
            upper = contrastYlim[1]

        normalizeContrastY(fig,
                           contrast_ylim=contrastYlim,
                           show_all_yaxes=showAllYAxes)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're all done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
Esempio n. 20
0
    plt.ylim([0.4, 1.0])
    plt.xticks(np.arange(1, 10.5, step=1))
    plt.grid
    plt.savefig('FIGURES/accTrainVal_10foldCV_4classes_old.pdf')

# normalize confusion matrices
normalizedAvgCM = np.zeros((numClasses, numClasses))
for i in range(len(confusionMatrices)):
    cm = confusionMatrices[i]
    normalizedAvgCM += cm / cm.astype(np.float).sum(axis=1)

normalizedAvgCM = normalizedAvgCM / nfold
# plot one time prediction confusion matrix
df_cm = pd.DataFrame(normalizedAvgCM, index=classNames, columns=classNames)
plt.figure(figsize=(9.6, 4.1))  # 5.7
sns.set(font_scale=1.4)  # for label size
ax = sns.heatmap(
    df_cm,
    cbar_kws={'ticks': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]},
    vmin=0,
    vmax=1.0,
    annot=True,
    annot_kws={"size": 18},
    fmt='2.2f',
    cmap="Blues")  # font size
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
ax.set_ylim(sorted(ax.get_xlim(), reverse=True))
ax.set_yticklabels(classNames, rotation=0, fontsize="16", va="center")
ax.set_xticklabels(classNames, rotation=0, fontsize="16", ha="center")
plt.tight_layout()
def perform_edge_imputation():
    accuracy_at_removed = []
    remove_probability = [0.1 * i for i in xrange(10)]

    constraints = {'edge_count': (1000, 1100)}

    samples = 2
    index = [
        'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert',
        'Planted Partition Model'
    ]
    constraints_enforced = False
    rgs = [
        structural_identities.watts_strogatz_generator,
        structural_identities.geometric_generator,
        structural_identities.erdos_renyi_generator,
        structural_identities.barabasi_albert_generator,
        structural_identities.planted_partition_generator
    ]

    for p in remove_probability:
        correct = 0.0
        accuracy_at_k = [0] * 5
        confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)]

        for uni, rg in enumerate(rgs):
            title = index[uni]
            actual = uni
            for i in xrange(samples):
                G = structural_identities.constrained_generation(
                    rg, constraints)

                new_G = deepcopy(G)
                new_G = remove_edges(new_G, p)
                new_G = impute_edge_algorithm(new_G, G)

                cluster, types = predict_structure(new_G, 1,
                                                   constraints_enforced)

                predicted = cluster.index(min(cluster))
                print title, types[predicted]
                if actual == predicted:
                    correct += 1

                confusion_matrix[actual][predicted] += 1

                array = np.array(cluster)
                order = array.argsort()
                ranks = order.argsort().tolist()

                k = -1
                for i in xrange(len(cluster)):  # 5 types of rg
                    if title == types[ranks.index(i)]:
                        k = i
                        break

                j = len(cluster) - 1
                while j >= k:
                    accuracy_at_k[j] += 1
                    j -= 1
        small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM']

        plt.figure(10)

        sns.set()
        ax = plt.axes()
        sns.heatmap(confusion_matrix,
                    ax=ax,
                    cmap="YlGnBu",
                    yticklabels=index,
                    xticklabels=small_index)
        ax.set_title('Confusion Matrix for Edge Imputed Graphs (' +
                     str((p) * 100) + ' percent removed)')
        plt.tight_layout()
        plt.savefig(
            '/Users/Brennan/Desktop/Networks/networks-project/pictures/CM_' +
            str((p) * 100) + '_removed.png')
        plt.close()

        sns.reset_defaults()
        imp.reload(mpl)
        imp.reload(plt)
        imp.reload(sns)
        # import matplotlib as mpl
        # import matplotlib.pyplot as plt

        for i in xrange(len(accuracy_at_k)):
            accuracy_at_k[i] /= (samples * 1.0 * len(rgs))

        if constraints_enforced:
            plt.plot([i for i in xrange(1, 6)],
                     accuracy_at_k,
                     marker='o',
                     color='red')
        else:
            plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o')

        plt.xlabel('k (top k labels)')
        plt.ylim((0, 1.1))
        plt.ylabel('Accuracy @ k')
        plt.title('Prediction Accuracy for Edge Imputed Graphs (' +
                  str((p) * 100) + ' percent removed)')
        plt.savefig(
            '/Users/Brennan/Desktop/Networks/networks-project/pictures/PA_' +
            str((p) * 100) + '_removed.png')
        plt.tight_layout()
        plt.close()

        accuracy_at_removed.append(correct / (len(rgs) * samples))

    plt.plot(remove_probability, accuracy_at_removed, marker='o')
    plt.xlabel('Percent of Edges Removed')
    plt.ylim((0, 1.1))
    plt.ylabel('Accuracy @ 1')
    plt.title('Prediction Accuracy for Graph Recovery (Edge Imputation)')
    plt.savefig(
        '/Users/Brennan/Desktop/Networks/networks-project/pictures/graph_imputation_forall_p.png'
    )
    plt.clf()
Esempio n. 22
0
''' 
Written by Sara Camnasio
[email protected]

'''

import numpy as np
import time
from matplotlib import pyplot as plt
import matplotlib.lines as mlines
import seaborn.apionly as sns
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties
sns.set(color_codes=True)

# Importing results from table
source = np.genfromtxt(
    '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/Final_sample.csv',
    delimiter=',',
    skip_header=1,
    dtype=float)
source1 = np.genfromtxt(
    '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/Final_sample.csv',
    delimiter=',',
    skip_header=1,
    dtype=str)

# Naming values for easier plotting adjustments:
names = source1[:, 0]
IP = source[:, 18]
Esempio n. 23
0
def contrastplot_test(
    data, x, y, idx=None, 
    
    alpha=0.75, 
    axis_title_size=None,

    barWidth=5,

    contrastShareY=True,
    contrastEffectSizeLineStyle='solid',
    contrastEffectSizeLineColor='black',
    contrastYlim=None,
    contrastZeroLineStyle='solid', 
    contrastZeroLineColor='black', 

    effectSizeYLabel="Effect Size", 

    figsize=None, 
    floatContrast=True,
    floatSwarmSpacer=0.2,

    heightRatio=(1, 1),

    idcol=None,

    lineWidth=2,
    legend=True,
    legendFontSize=14,
    legendFontProps={},

    paired=False,
    pal=None, 

    rawMarkerSize=8,
    rawMarkerType='o',
    reps=3000,
    
    showGroupCount=True,
    show95CI=False, 
    showAllYAxes=False,
    showRawData=True,
    smoothboot=False, 
    statfunction=None, 

    summaryBar=False, 
    summaryBarColor='grey',
    summaryBarAlpha=0.25,

    summaryColour='black', 
    summaryLine=True, 
    summaryLineStyle='solid', 
    summaryLineWidth=0.25, 

    summaryMarkerSize=10, 
    summaryMarkerType='o',

    swarmShareY=True, 
    swarmYlim=None, 

    tickAngle=45,
    tickAlignment='right',

    violinOffset=0.375,
    violinWidth=0.2, 
    violinColor='k',

    xticksize=None,
    yticksize=None,

    **kwargs):

    '''Takes a pandas dataframe and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    -----------------------------------------------------------------------
    Description of flags upcoming.'''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError("The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # Get and set levels of data[x]    
    if idx is None:
        widthratio=[1]
        allgrps=np.sort(data[x].unique())
        if paired:
            # If `idx` is not specified, just take the FIRST TWO levels alphabetically.
            tuple_in=tuple(allgrps[0:2],)
        else:
            # No idx is given, so all groups are compared to the first one in the DataFrame column.
            tuple_in=(tuple(allgrps), )
            if len(allgrps)>2:
                floatContrast=False

    else:
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple) 
            tuple_in=(idx, )
            widthratio=[1]
            if len(idx)>2:
                floatContrast=False
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            tuple_in=idx
            if ( any(len(element)>2 for element in tuple_in) ):
                # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                floatContrast=False
            # Make sure the widthratio of the seperate multiplot corresponds to how 
            # many groups there are in each one.
            widthratio=[]
            for i in tuple_in:
                widthratio.append(len(i))
        else:
            raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \
                Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting.")

    # initialise statfunction
    if statfunction == None:
        statfunction=np.mean

    # Create list to collect all the contrast DataFrames generated.
    contrastList=list()
    contrastListNames=list()
    # # Calculate the bootstraps according to idx.
    # for ix, current_tuple in enumerate(tuple_in):
    #     bscontrast=list()
    #     for i in range (1, len(current_tuple)):
    #     # Note that you start from one. No need to do auto-contrast!
    #         tempbs=bootstrap_contrast(
    #             data=data,
    #             x=x,
    #             y=y,
    #             idx=[current_tuple[0], current_tuple[i]],
    #             statfunction=statfunction,
    #             smoothboot=smoothboot,
    #             reps=reps)
    #         bscontrast.append(tempbs)
    #         contrastList.append(tempbs)
    #         contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol=kwargs['hue']
            colGrps=data[colorCol].unique()
            nColors=len(colGrps)
        else:
            colorCol=x
            colGrps=data[x].unique()
            nColors=len([element for tupl in tuple_in for element in tupl])
        plotPal=dict( zip( colGrps, sns.color_palette(n_colors=nColors) ) )
    else:
        plotPal=pal

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar=True
        summaryLine=False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine=False

    if swarmYlim is None:
        # get range of _selected groups_.
        u = list()
        for t in idx:
            for i in np.unique(t):
                u.append(i)
        u = np.unique(u)
        tempdat=data[data[x].isin(u)]
        swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])])
    else:
        swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]])

    if contrastYlim is not None:
        contrastYlim=np.array([contrastYlim[0],contrastYlim[1]])

    barWidth=barWidth/1000 # Not sure why have to reduce the barwidth by this much! 
    if showRawData is True:
        maxSwarmSpan=0.25
    else:
        maxSwarmSpan=barWidth

    # Expand the ylim in both directions.
    ## Find half of the range of swarm_ylim.
    swarmrange=swarm_ylim[1] -swarm_ylim[0]
    pad=0.1*swarmrange
    x2=np.array([swarm_ylim[0]-pad, swarm_ylim[1]+pad])
    swarm_ylim=x2

    # plot params
    if axis_title_size is None:
        axis_title_size=25
    if yticksize is None:
        yticksize=18
    if xticksize is None:
        xticksize=18

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams={'labelsize' : axis_title_size}
    xtickParams={'labelsize' : xticksize}
    ytickParams={'labelsize' : yticksize}
    svgParams={'fonttype' : 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams) 

    if figsize is None:
        if len(tuple_in)>2:
            figsize=(12,(12/np.sqrt(2)))
        else:
            figsize=(8,(8/np.sqrt(2)))
    
    # Initialise figure, taking into account desired figsize.
    fig=plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain=gridspec.GridSpec( 
        1, np.shape(tuple_in)[0], 
         # 1 row; columns based on number of tuples in tuple.
         width_ratios=widthratio,
         wspace=0 )

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        plotdat=data[data[x].isin(current_tuple)]
        plotdat[x]=plotdat[x].astype("category")
        plotdat[x].cat.set_categories(
            current_tuple,
            ordered=True,
            inplace=True)
        plotdat.sort_values(by=[x])
        # Drop all nans. 
        plotdat=plotdat.dropna()

        # Calculate summaries.
        summaries=plotdat.groupby([x],sort=True)[y].apply(statfunction)

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot
            ax_raw=fig.add_subplot(gsMain[gsIdx],
                frame_on=False)
            ax_contrast=ax_raw.twinx()
        else:
        # Create subGridSpec with 2 rows and 1 column.
            subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1,
                subplot_spec=gsMain[gsIdx],
                wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw=plt.Subplot(fig,
                subGridSpec[0, 0],
                frame_on=False)
            ax_contrast=plt.Subplot(fig,
                subGridSpec[1, 0],
                sharex=ax_raw,
                frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast=list()
        for i in range (1, len(current_tuple)):
        # Note that you start from one. No need to do auto-contrast!
            tempbs=bootstrap_contrast(
                data=data,
                x=x,
                y=y,
                idx=[current_tuple[0], current_tuple[i]],
                statfunction=statfunction,
                smoothboot=smoothboot,
                reps=reps)
            bscontrast.append(tempbs)
            contrastList.append(tempbs)
            contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])
        
        #### PLOT RAW DATA.
        if showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            ax_raw.set_ylim(swarm_ylim)
            sw=sns.swarmplot(
                data=plotdat, 
                x=x, y=y, 
                order=current_tuple, 
                ax=ax_raw, 
                alpha=alpha, 
                palette=plotPal,
                size=rawMarkerSize,
                marker=rawMarkerType,
                **kwargs)

        if summaryBar is True:
            bar_raw=sns.barplot(
                x=summaries.index.tolist(),
                y=summaries.values,
                facecolor=summaryBarColor,
                ax=ax_raw,
                alpha=summaryBarAlpha)
        
        if floatContrast:
            # Get horizontal offset values.
            maxXBefore=max(sw.collections[0].get_offsets().T[0])
            minXAfter=min(sw.collections[1].get_offsets().T[0])
            xposAfter=maxXBefore+floatSwarmSpacer
            xAfterShift=minXAfter-xposAfter
            # shift the swarmplots
            offsetSwarmX(sw.collections[1], -xAfterShift)

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width=bar.get_x()
                width=bar.get_width()
                centre=x_width + (width/2.)
                if i == 0:
                    bar.set_x(centre-maxSwarmSpan/2.)
                else:
                    bar.set_x(centre-xAfterShift-maxSwarmSpan/2.)
                bar.set_width(maxSwarmSpan)

            ## Set the ticks locations for ax_raw.
            ax_raw.xaxis.set_ticks((0, xposAfter))
            firstTick=ax_raw.xaxis.get_ticklabels()[0].get_text()
            secondTick=ax_raw.xaxis.get_ticklabels()[1].get_text()
            ax_raw.set_xticklabels([firstTick,#+' n='+count[firstTick],
                                     secondTick],#+' n='+count[secondTick]],
                                   rotation=tickAngle,
                                   horizontalalignment=tickAlignment)

        if summaryLine is True:
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i -summaryLineWidth, 
                    i + summaryLineWidth), # x-coordinates
                    (m, m),
                    color=summaryColour, 
                    linestyle=summaryLineStyle)

        if show95CI is True:
                sns.barplot(
                    data=plotdat, 
                    x=x, y=y, 
                    ax=ax_raw, 
                    alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple)==2:
            # Plot the CIs on the contrast axes.
            plotbootstrap(sw.collections[1],
                          bslist=tempbs,
                          ax=ax_contrast, 
                          violinWidth=violinWidth,
                          violinOffset=violinOffset,
                          markersize=summaryMarkerSize,
                          marker=summaryMarkerType,
                          offset=floatContrast,
                          color=violinColor,
                          linewidth=1)
            if floatContrast:
                # Set reference lines
                ## First get leftmost limit of left reference group
                xtemp, _=np.array(sw.collections[0].get_offsets()).T
                leftxlim=xtemp.min()
                ## Then get leftmost limit of right test group
                xtemp, _=np.array(sw.collections[1].get_offsets()).T
                rightxlim=xtemp.min()

                ## zero line
                ax_contrast.hlines(0,                   # y-coordinates
                                leftxlim, 3.5,       # x-coordinates, start and end.
                                linestyle=contrastZeroLineStyle,
                                linewidth=0.75,
                                color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(tempbs['summary'], 
                                rightxlim, 3.5,        # x-coordinates, start and end.
                                linestyle=contrastEffectSizeLineStyle,
                                linewidth=0.75,
                                color=contrastEffectSizeLineColor)

                
                ## If the effect size is positive, shift the right axis up.
                if float(tempbs['summary'])>0:
                    rightmin=ax_raw.get_ylim()[0] -float(tempbs['summary'])
                    rightmax=ax_raw.get_ylim()[1] -float(tempbs['summary'])
                ## If the effect size is negative, shift the right axis down.
                elif float(tempbs['summary'])<0:
                    rightmin=ax_raw.get_ylim()[0] + float(tempbs['summary'])
                    rightmax=ax_raw.get_ylim()[1] + float(tempbs['summary'])

                ax_contrast.set_ylim(rightmin, rightmax)

                    
                if gsIdx>0:
                    ax_contrast.set_ylabel('')

                align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)
                
                # Set xlims so everything is properly visible!
                swarm_xbounds=ax_raw.get_xbound()
                ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), 
                    swarm_xbounds[1] + (summaryLineWidth * 1.1))

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(
                bslist=bscontrast,
                ax=ax_contrast,
                violinWidth=violinWidth,
                violinOffset=violinOffset,
                markersize=summaryMarkerSize,
                marker=summaryMarkerType,
                linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx>0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList=pd.DataFrame(contrastList).T
    contrastList.columns=contrastListNames
    
    ########
    axesCount=len(fig.get_axes())

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx=fig.axes[i]

        if i!=axesCount-2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(showAllYAxes)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(Line2D(
                (axx.xaxis.get_view_interval()[0], 
                    axx.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )

        # I don't know why the swarm axes controls the contrast axes ticks....
        if showGroupCount:
            count=data.groupby(x).count()[y]
            newticks=list()
            for ix, t in enumerate(axx.xaxis.get_ticklabels()):
                t_text=t.get_text()
                nt=t_text+' n='+str(count[t_text])
                newticks.append(nt)
            axx.xaxis.set_ticklabels(newticks)

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i==axesCount-2: # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                    bbox_to_anchor=(1.1,1.0),
                    fontsize=legendFontSize,
                    **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j,i in enumerate(range(1, axesCount, 2)):
        axx=fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright=axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                xmin=xleft-1, 
                xmax=xright+1,
                linestyle=contrastZeroLineStyle,
                linewidth=0.75,
                color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)
            # # Draw back x-axis lines connecting ticks.
            # drawback_x(axx)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes.
                    # Not entirely sure why I have to do this.
                    drawback_y(axx)

            sns.despine(ax=axx, 
                top=True, right=True, 
                left=False, bottom=False, 
                trim=True)

            # Rotate tick labels.
            rotateTicks(axx,tickAngle,tickAlignment)

        else:
            # Re-draw the floating axis to the correct limits.
            lower=np.min(contrastList.ix['diffarray',j])
            upper=np.max(contrastList.ix['diffarray',j])
            meandiff=contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower>0:
                lower=0.
            if upper<0:
                upper=0.

            ## Get the tick interval from the left y-axis.
            leftticks=fig.get_axes()[i-1].get_yticks()
            tickstep=leftticks[1] -leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1=axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2=list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2)<meandiff:
                ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff<np.min(newticks2):
                ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2=np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))
            
            ## Despine the axes.
            sns.despine(ax=axx, trim=True, 
                bottom=False, right=False,
                left=True, top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount>2 and 
        contrastShareY is True and 
        floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower=list()
            upper=list()
            for c in range(0,len(contrastList.columns)):
                lower.append( np.min(contrastList.ix['bca_ci_low',c]) )
                upper.append( np.max(contrastList.ix['bca_ci_high',c]) )
            lower=np.min(lower)
            upper=np.max(upper)
        else:
            lower=contrastYlim[0]
            upper=contrastYlim[1]

        normalizeContrastY(fig, 
            contrast_ylim = contrastYlim, 
            show_all_yaxes = showAllYAxes)

    # if (axesCount==2 and 
    #     floatContrast is False):
    #     drawback_x(fig.get_axes()[1])
    #     drawback_y(fig.get_axes()[1])

    # if swarmShareY is False:
    #     for i in range(0, axesCount, 2):
    #         drawback_y(fig.get_axes()[i])
                       
    # if contrastShareY is False:
    #     for i in range(1, axesCount, 2):
    #         if floatContrast is True:
    #             sns.despine(ax=fig.get_axes()[i], 
    #                        top=True, right=False, left=True, bottom=True, 
    #                        trim=True)
    #         else:
    #             sns.despine(ax=fig.get_axes()[i], trim=True)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)
    
    # And we're all done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList