Ejemplo n.º 1
0
def do_surveys():
    with figure("tlx_results", figsize=fig_size(0.44, 1)):
        sns.factorplot(x="experiment", y="tlx", data=tlx, kind="box")
        sns.swarmplot(x="experiment", y=r"tlx",
                      data=tlx, palette=cmap_complement, split=True)
        plt.ylim(0, plt.ylim()[1])
        plt.ylabel("NASA-TLX weighted score")

    with figure("tlx_components", figsize=fig_size(0.44, 1)):
        components = ["mental", "physical", "temporal", "performance",
                      "effort", "frustration"]
        molten = pd.melt(tlx, id_vars=["user", "experiment", "order"],
                         value_vars=components,
                         var_name="component", value_name="score")
        g = sns.barplot(x=r"component", y="score", hue="experiment",
                        data=molten)

        plt.gca().set_xticklabels(
                ["MD", "PD", "TD", "P", "E", "F"])

        plt.xlabel("NASA-TLX component")
        plt.ylabel("score")

    with figure("survey_results", fig_size(0.44, 1)):
        sns.factorplot(x="experiment", y="total", data=surveys, kind="box")
        sns.swarmplot(x="experiment", y=r"total", data=surveys, palette=cmap_complement, split=True)
        plt.ylim(0, plt.ylim()[1])
        plt.ylabel("survey score")

    with figure("survey_components", figsize=fig_size(0.9, 0.5)):
        molten = pd.melt(surveys, id_vars=["user", "experiment", "order"],
                         value_vars=[r"orientation_understanding",
                                     r"orientation_control",
                                     r"position_understanding",
                                     r"position_control",
                                     r"spacial_understanding",
                                     r"spacial_control"],
                         var_name="question", value_name="rating")
        g = sns.barplot(x=r"rating", y=r"question", hue="experiment",
                        data=molten)
        sns.stripplot(x="rating", y=r"question", data=molten, hue="experiment",
                      split=True, palette=cmap_complement, jitter=0.6, size=3)

        plt.gca().set_yticklabels(
                ["angle aware", "angle control",
                 "position aware", "position control",
                 "rel. pos. aware", "rel. pos. control"])

        handles, labels = g.get_legend_handles_labels()
        plt.legend(handles[2:], labels[2:])
        plt.xlabel("rating")
        plt.title("Survey results")
Ejemplo n.º 2
0
def do_durations():
    with figure("duration", figsize=fig_size(0.44, 1)):
        sns.factorplot(x="experiment", y="duration", data=analyses, kind="box")
        sns.swarmplot(x="experiment", y="duration", split=True, data=analyses,
                      palette=cmap_complement)
        plt.ylim(0, plt.ylim()[1])
        plt.ylabel("duration (s)")

    with figure("duration_runs", figsize=fig_size(0.44, 1)):
        sns.factorplot(x="order", y="duration", hue="experiment", data=analyses,
                       capsize=0.2)
        plt.ylim(0, plt.ylim()[1])
        plt.ylabel("duration (s)")
        plt.xlabel("run")
Ejemplo n.º 3
0
def registration_qc(
    df,
    anova_type=3,
    cmap="Set3",
    extra=False,
    extra_cmap=EXTRA_COLORSET,
    group={"sub": "Subject"},
    model="{value} ~ C({extra}) + C({group}) + C({repeat}) -1",
    print_model=False,
    print_anova=False,
    repeat={"ses": "Session"},
    samri_style=True,
    save_as=False,
    show=True,
    value={"similarity": "Similarity"},
    values_rename={},
):
    """Aggregate plot of similarity metrics for registration quality control

	Parameters
	----------

	df : pandas.DataFrame or str
		Pandas Dataframe or CSV file containing similarity scores.
	anova_type : int, optional
		Type of the ANOVA to use for model analysis. Consult [1]_ for a theoretical overview, and `statsmodels.stats.anova.anova_lm` for the implementation we use.
	cmap : str or list, optional
		If a string, the variable specifies the matplotlib colormap [2]_ (qualitative colormaps are recommended) to use for `repeat` highlighting. If a List, the variable should be a list of colors (e.g. `["#00FF00","#2222FF"]`).
	extra_cmap : str or list, optional
		If a string, the variable specifies the matplotlib colormap [2]_ (qualitative colormaps are recommended) to use for `extra` highlighting,  which is applied as a contour to the `repeat`-colored pacthes. If a List, the variable should be a list of colors (e.g. `["#00FF00","#2222FF"]`).
	group : str or dict, optional
		Column of `df` to use as the group factor (values of this factor will represent the x-axis). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting.
	model : string, optional
		A string specifying the ANOVA formula as a statsmodels function [3]_. It may contain string substitutions (e.g. `"{value} ~ C({group})"`).
	print_model : bool, optional
		Whether to print the model output table.
	print_anova : bool, optional
		Whether to print the ANOVA output table.
	samri_style : bool, optional
		Whether to apply a generic SAMRI style to the plot.
	save_as : str, optional
		Path under which to save the generated plot (format is interpreted from provided extension).
	show : bool, optional
		Whether to show the plot in an interactive window.
	repeat : str or dict, optional
		Column of `df` to use as the repeat factor (values of this factor will be represent via different hues, according to `cmap`). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting.
	value : str or dict, optional
		Column of `df` to use as the value (this variable will be represented on the y-axis). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting.
	values_rename : dict, optional
		Dictionary used to rename values in `df`. This is useful for the input of longer but clearer names for plotting (this parameter will not rename column names, for renaming those, see parameters `extra`, `group`, `repeat`, and `value`).

	Returns
	-------
	pandas.DataFrame
		ANOVA summary table in DataFrame format.

	Reference
	----------
	.. [1] http://goanna.cs.rmit.edu.au/~fscholer/anova.php

	.. [2] https://matplotlib.org/examples/color/colormaps_reference.html

	.. [3] http://www.statsmodels.org/dev/example_formulas.html
	"""
    import seaborn.apionly as sns
    import statsmodels.api as sm
    import statsmodels.formula.api as smf

    if samri_style:
        this_path = path.dirname(path.realpath(__file__))
        plt.style.use(path.join(this_path, "samri.conf"))

    try:
        if isinstance(df, basestring):
            df = path.abspath(path.expanduser(df))
            df = pd.read_csv(df)
    except NameError:
        if isinstance(df, str):
            df = path.abspath(path.expanduser(df))
            df = pd.read_csv(df)

    for key in values_rename:
        df.replace(to_replace=key, value=values_rename[key], inplace=True)

    column_renames = {}
    if isinstance(value, dict):
        column_renames.update(value)
        value = list(value.values())[0]
    if isinstance(group, dict):
        column_renames.update(group)
        group = list(group.values())[0]
    if isinstance(repeat, dict):
        column_renames.update(repeat)
        repeat = list(repeat.values())[0]
    if isinstance(extra, dict):
        column_renames.update(extra)
        extra = list(extra.values())[0]
    df = df.rename(columns=column_renames)

    model = model.format(value=value, group=group, repeat=repeat, extra=extra)
    regression_model = smf.ols(model, data=df).fit()
    if print_model:
        print(regression_model.summary())

    anova_summary = sm.stats.anova_lm(regression_model, typ=anova_type)
    if print_anova:
        print(anova_summary)

    if extra:
        myplot = sns.swarmplot(
            x=group,
            y=value,
            hue=extra,
            data=df,
            size=rcParams["lines.markersize"] * 1.4,
            palette=sns.color_palette(extra_cmap),
        )
    myplot = sns.swarmplot(
        x=group,
        y=value,
        hue=repeat,
        data=df,
        edgecolor=(1, 1, 1, 0.0),
        linewidth=rcParams["lines.markersize"] * .4,
        palette=sns.color_palette(cmap),
    )

    plt.legend(loc=rcParams["legend.loc"])

    if show:
        sns.plt.show()
    if save_as:
        plt.savefig(path.abspath(path.expanduser(save_as)),
                    bbox_inches='tight')

    return anova_summary
Ejemplo n.º 4
0
def pairedcontrast(data,
                   x,
                   y,
                   idcol,
                   reps=3000,
                   statfunction=None,
                   idx=None,
                   figsize=None,
                   beforeAfterSpacer=0.01,
                   violinWidth=0.005,
                   floatOffset=0.05,
                   showRawData=False,
                   showAllYAxes=False,
                   floatContrast=True,
                   smoothboot=False,
                   floatViolinOffset=None,
                   showConnections=True,
                   summaryBar=False,
                   contrastYlim=None,
                   swarmYlim=None,
                   barWidth=0.005,
                   rawMarkerSize=8,
                   rawMarkerType='o',
                   summaryMarkerSize=10,
                   summaryMarkerType='o',
                   summaryBarColor='grey',
                   meansSummaryLineStyle='solid',
                   contrastZeroLineStyle='solid',
                   contrastEffectSizeLineStyle='solid',
                   contrastZeroLineColor='black',
                   contrastEffectSizeLineColor='black',
                   pal=None,
                   legendLoc=2,
                   legendFontSize=12,
                   legendMarkerScale=1,
                   axis_title_size=None,
                   yticksize=None,
                   xticksize=None,
                   tickAngle=45,
                   tickAlignment='right',
                   **kwargs):

    # Preliminaries.
    data = data.dropna()

    # plot params
    if axis_title_size is None:
        axis_title_size = 15
    if yticksize is None:
        yticksize = 12
    if xticksize is None:
        xticksize = 12

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)

    ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
    if idx is None:
        idx = tuple(np.unique(data[x])[0:2], )
    else:
        # check if multi-plot or not
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            if len(idx) != 2:
                print(idx, "does not have length 2.")
                sys.exit(0)
            else:
                idx = (tuple(idx, ), )
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            if (any(len(element) != 2 for element in idx)):
                # If any of the tuples contain more than 2 elements.
                print(element, "does not have length 2.")
                sys.exit(0)
    if floatViolinOffset is None:
        floatViolinOffset = beforeAfterSpacer / 2
    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])
    if swarmYlim is not None:
        swarmYlim = np.array([swarmYlim[0], swarmYlim[1]])

    ## Here we define the palette on all the levels of the 'x' column.
    ## Thus, if the same pandas dataframe is re-used across different plots,
    ## the color identity of each group will be maintained.
    ## Set palette based on total number of categories in data['x'] or data['hue_column']
    if 'hue' in kwargs:
        u = kwargs['hue']
    else:
        u = x
    if ('color' not in kwargs and 'hue' not in kwargs):
        kwargs['color'] = 'k'

    if pal is None:
        pal = dict(
            zip(data[u].unique(),
                sns.color_palette(n_colors=len(data[u].unique()))))
    else:
        pal = pal

    # Initialise figure.
    if figsize is None:
        if len(idx) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (6, 6)
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `levs_tuple` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(idx)[0])  # 1 row; columns based on number of tuples in tuple.
    # Set default statfunction
    if statfunction is None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    for gsIdx, xlevs in enumerate(idx):
        ## Pivot tempdat to get before and after lines.
        data_pivot = data.pivot_table(index=idcol, columns=x, values=y)

        # Start plotting!!
        if floatContrast is True:
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx])
            ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      gsSubGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)

        ## Plot raw data as swarmplot or stripplot.
        if showRawData is True:
            swarm_raw = sns.swarmplot(data=data,
                                      x=x,
                                      y=y,
                                      order=xlevs,
                                      ax=ax_raw,
                                      palette=pal,
                                      size=rawMarkerSize,
                                      marker=rawMarkerType,
                                      **kwargs)
        else:
            swarm_raw = sns.stripplot(data=data,
                                      x=x,
                                      y=y,
                                      order=xlevs,
                                      ax=ax_raw,
                                      palette=pal,
                                      **kwargs)
        swarm_raw.set_ylim(swarmYlim)

        ## Get some details about the raw data.
        maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0])
        minXAfter = min(swarm_raw.collections[1].get_offsets().T[0])
        if showRawData is True:
            #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2
            beforeAfterSpacer = 1
        xposAfter = maxXBefore + beforeAfterSpacer
        xAfterShift = minXAfter - xposAfter

        ## shift the after swarmpoints closer for aesthetic purposes.
        offsetSwarmX(swarm_raw.collections[1], -xAfterShift)

        ## pandas DataFrame of 'before' group
        x1 = pd.DataFrame({
            str(xlevs[0] + '_x'):
            pd.Series(swarm_raw.collections[0].get_offsets().T[0]),
            xlevs[0]:
            pd.Series(swarm_raw.collections[0].get_offsets().T[1]),
            '_R_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[0]),
            '_G_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[1]),
            '_B_':
            pd.Series(swarm_raw.collections[0].get_facecolors().T[2]),
        })
        ## join the RGB columns into a tuple, then assign to a column.
        x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1)
        x1 = x1.sort_values(by=xlevs[0])
        x1.index = data_pivot.sort_values(by=xlevs[0]).index

        ## pandas DataFrame of 'after' group
        ### create convenient signifiers for column names.
        befX = str(xlevs[0] + '_x')
        aftX = str(xlevs[1] + '_x')

        x2 = pd.DataFrame({
            aftX:
            pd.Series(swarm_raw.collections[1].get_offsets().T[0]),
            xlevs[1]:
            pd.Series(swarm_raw.collections[1].get_offsets().T[1])
        })
        x2 = x2.sort_values(by=xlevs[1])
        x2.index = data_pivot.sort_values(by=xlevs[1]).index

        ## Join x1 and x2, on both their indexes.
        plotPoints = x1.merge(x2,
                              left_index=True,
                              right_index=True,
                              how='outer')

        ## Add the hue column if hue argument was passed.
        if 'hue' in kwargs:
            h = kwargs['hue']
            plotPoints[h] = data.pivot(index=idcol, columns=x,
                                       values=h)[xlevs[0]]
            swarm_raw.legend(loc=legendLoc,
                             fontsize=legendFontSize,
                             markerscale=legendMarkerScale)

        ## Plot the lines to join the 'before' points to their respective 'after' points.
        if showConnections is True:
            for i in plotPoints.index:
                ax_raw.plot(
                    [plotPoints.ix[i, befX], plotPoints.ix[i, aftX]],
                    [plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]]],
                    linestyle='solid',
                    color=plotPoints.ix[i, '_hue_'],
                    linewidth=0.75,
                    alpha=0.75)

        ## Hide the raw swarmplot data if so desired.
        if showRawData is False:
            swarm_raw.collections[0].set_visible(False)
            swarm_raw.collections[1].set_visible(False)

        if showRawData is True:
            #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2
            maxSwarmSpan = 0.5
        else:
            maxSwarmSpan = barWidth

        ## Plot Summary Bar.
        if summaryBar is True:
            # Calculate means
            means = data.groupby([x], sort=True).mean()[y]
            # # Calculate medians
            # medians = data.groupby([x], sort = True).median()[y]

            ## Draw summary bar.
            bar_raw = sns.barplot(x=means.index,
                                  y=means.values,
                                  order=xlevs,
                                  ax=ax_raw,
                                  ci=0,
                                  facecolor=summaryBarColor,
                                  alpha=0.25)
            ## Draw zero reference line.
            ax_raw.add_artist(
                Line2D((ax_raw.xaxis.get_view_interval()[0],
                        ax_raw.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + width / 2.
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan / 2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                bar.set_width(maxSwarmSpan)

        # Get y-limits of the treatment swarm points.
        beforeRaw = pd.DataFrame(swarm_raw.collections[0].get_offsets())
        afterRaw = pd.DataFrame(swarm_raw.collections[1].get_offsets())
        before_leftx = min(beforeRaw[0])
        after_leftx = min(afterRaw[0])
        after_rightx = max(afterRaw[0])
        after_stat_summary = statfunction(beforeRaw[1])

        # Calculate the summary difference and CI.
        plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]]
        plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0]

        tempseries = plotPoints['delta_y'].tolist()
        test = tempseries.count(tempseries[0]) != len(tempseries)

        bootsDelta = bootstrap(plotPoints['delta_y'],
                               statfunction=statfunction,
                               smoothboot=smoothboot,
                               reps=reps)
        summDelta = bootsDelta['summary']
        lowDelta = bootsDelta['bca_ci_low']
        highDelta = bootsDelta['bca_ci_high']

        # set new xpos for delta violin.
        if floatContrast is True:
            if showRawData is False:
                xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset
            else:
                xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan
        else:
            xposPlusViolin = xposAfter
        if showRawData is True:
            # If showRawData is True and floatContrast is True,
            # set violinwidth to the barwidth.
            violinWidth = maxSwarmSpan

        xmaxPlot = xposPlusViolin + violinWidth

        # Plot the summary measure.
        ax_contrast.plot(xposPlusViolin,
                         summDelta,
                         marker='o',
                         markerfacecolor='k',
                         markersize=summaryMarkerSize,
                         alpha=0.75)

        # Plot the CI.
        ax_contrast.plot([xposPlusViolin, xposPlusViolin],
                         [lowDelta, highDelta],
                         color='k',
                         alpha=0.75,
                         linestyle='solid')

        # Plot the violin-plot.
        v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin],
                                   widths=violinWidth,
                                   showextrema=False,
                                   showmeans=False)
        halfviolin(v, half='right', color='k')

        # Remove left axes x-axis title.
        ax_raw.set_xlabel("")
        # Remove floating axes y-axis title.
        ax_contrast.set_ylabel("")

        # Set proper x-limits
        ax_raw.set_xlim(before_leftx - beforeAfterSpacer / 2, xmaxPlot)
        ax_raw.get_xaxis().set_view_interval(
            before_leftx - beforeAfterSpacer / 2,
            after_rightx + beforeAfterSpacer / 2)
        ax_contrast.set_xlim(ax_raw.get_xlim())

        if floatContrast is True:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            # Make sure they have the same y-limits.
            ax_contrast.set_ylim(ax_raw.get_ylim())

            # Drawing in the x-axis for ax_raw.
            ## Set the tick labels!
            ax_raw.set_xticklabels(xlevs,
                                   rotation=tickAngle,
                                   horizontalalignment=tickAlignment)
            ## Get lowest y-value for ax_raw.
            y = ax_raw.get_yaxis().get_view_interval()[0]

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]),
                        ax_contrast, 0)

            # Add label to floating axes. But on ax_raw!
            ax_raw.text(x=deltaSwarmX,
                        y=ax_raw.get_yaxis().get_view_interval()[0],
                        horizontalalignment='left',
                        s='Difference',
                        fontsize=15)

            # Set reference lines
            ## zero line
            ax_contrast.hlines(
                0,  # y-coordinate
                ax_contrast.xaxis.get_majorticklocs()
                [0],  # x-coordinates, start and end.
                ax_raw.xaxis.get_view_interval()[1],
                linestyle='solid',
                linewidth=0.75,
                color='black')

            ## effect size line
            ax_contrast.hlines(summDelta,
                               ax_contrast.xaxis.get_majorticklocs()[1],
                               ax_raw.xaxis.get_view_interval()[1],
                               linestyle='solid',
                               linewidth=0.75,
                               color='black')

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.)
        else:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            fig.add_subplot(ax_raw)
            fig.add_subplot(ax_contrast)
        ax_contrast.set_ylim(contrastYlim)
        # Calculate p-values.
        # 1-sample t-test to see if the mean of the difference is different from 0.
        ttestresult = ttest_1samp(plotPoints['delta_y'], popmean=0)[1]
        bootsDelta['ttest_pval'] = ttestresult
        contrastList.append(bootsDelta)
        contrastListNames.append(str(xlevs[1]) + ' v.s. ' + str(xlevs[0]))

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Now we iterate thru the contrast axes to normalize all the ylims.
    for j, i in enumerate(range(1, len(fig.get_axes()), 2)):
        axx = fig.get_axes()[i]
        ## Get max and min of the dataset.
        lower = np.min(contrastList.ix['stat_array', j])
        upper = np.max(contrastList.ix['stat_array', j])
        meandiff = contrastList.ix['summary', j]

        ## Make sure we have zero in the limits.
        if lower > 0:
            lower = 0.
        if upper < 0:
            upper = 0.

        ## Get tick distance on raw axes.
        ## This will be the tick distance for the contrast axes.
        rawAxesTicks = fig.get_axes()[i - 1].yaxis.get_majorticklocs()
        rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0]

        ## First re-draw of axis with new tick interval
        axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist))
        newticks1 = fig.get_axes()[i].get_yticks()

        if floatContrast is False:
            if (showAllYAxes is False and i in range(2, len(fig.get_axes()))):
                axx.get_yaxis().set_visible(showAllYAxes)
            else:
                ## Obtain major ticks that comfortably encompass lower and upper.
                newticks2 = list()
                for a, b in enumerate(newticks1):
                    if (b >= lower and b <= upper):
                        # if the tick lies within upper and lower, take it.
                        newticks2.append(b)
                # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
                if np.max(newticks2) < meandiff:
                    ind = np.where(newticks1 == np.max(newticks2))[0][
                        0]  # find out the max tick index in newticks1.
                    newticks2.append(newticks1[ind + 1])
                elif meandiff < np.min(newticks2):
                    ind = np.where(newticks1 == np.min(newticks2))[0][
                        0]  # find out the min tick index in newticks1.
                    newticks2.append(newticks1[ind - 1])
                newticks2 = np.array(newticks2)
                newticks2.sort()
                axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

                ## Draw zero reference line.
                axx.hlines(
                    y=0,
                    xmin=fig.get_axes()[i].get_xaxis().get_view_interval()[0],
                    xmax=fig.get_axes()[i].get_xaxis().get_view_interval()[1],
                    linestyle=contrastZeroLineStyle,
                    linewidth=0.75,
                    color=contrastZeroLineColor)

                sns.despine(ax=fig.get_axes()[i],
                            trim=True,
                            bottom=False,
                            right=True,
                            left=False,
                            top=True)

                ## Draw back the lines for the relevant y-axes.
                drawback_y(axx)

                ## Draw back the lines for the relevant x-axes.
                drawback_x(axx)

        elif floatContrast is True:
            ## Get the original ticks on the floating y-axis.
            newticks1 = fig.get_axes()[i].get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Re-draw the axis.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine and trim the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    for i in range(0, len(fig.get_axes()), 2):
        # Loop through the raw data swarmplots and despine them appropriately.
        if floatContrast is True:
            sns.despine(ax=fig.get_axes()[i], trim=True, right=True)

        else:
            sns.despine(ax=fig.get_axes()[i],
                        trim=True,
                        bottom=True,
                        right=True)
            fig.get_axes()[i].get_xaxis().set_visible(False)

        # Draw back the lines for the relevant y-axes.
        ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0]
        ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1]
        x, _ = fig.get_axes()[i].get_xaxis().get_view_interval()
        fig.get_axes()[i].add_artist(
            Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5))

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0)
    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
Ejemplo n.º 5
0
def contrastplot_test(data,
                      x,
                      y,
                      idx=None,
                      alpha=0.75,
                      axis_title_size=None,
                      barWidth=5,
                      contrastShareY=True,
                      contrastEffectSizeLineStyle='solid',
                      contrastEffectSizeLineColor='black',
                      contrastYlim=None,
                      contrastZeroLineStyle='solid',
                      contrastZeroLineColor='black',
                      effectSizeYLabel="Effect Size",
                      figsize=None,
                      floatContrast=True,
                      floatSwarmSpacer=0.2,
                      heightRatio=(1, 1),
                      idcol=None,
                      lineWidth=2,
                      legend=True,
                      legendFontSize=14,
                      legendFontProps={},
                      paired=False,
                      pal=None,
                      rawMarkerSize=8,
                      rawMarkerType='o',
                      reps=3000,
                      showGroupCount=True,
                      show95CI=False,
                      showAllYAxes=False,
                      showRawData=True,
                      smoothboot=False,
                      statfunction=None,
                      summaryBar=False,
                      summaryBarColor='grey',
                      summaryBarAlpha=0.25,
                      summaryColour='black',
                      summaryLine=True,
                      summaryLineStyle='solid',
                      summaryLineWidth=0.25,
                      summaryMarkerSize=10,
                      summaryMarkerType='o',
                      swarmShareY=True,
                      swarmYlim=None,
                      tickAngle=45,
                      tickAlignment='right',
                      violinOffset=0.375,
                      violinWidth=0.2,
                      violinColor='k',
                      xticksize=None,
                      yticksize=None,
                      **kwargs):
    '''Takes a pandas dataframe and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    -----------------------------------------------------------------------
    Description of flags upcoming.'''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError(
            "The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # Get and set levels of data[x]
    if idx is None:
        widthratio = [1]
        allgrps = np.sort(data[x].unique())
        if paired:
            # If `idx` is not specified, just take the FIRST TWO levels alphabetically.
            tuple_in = tuple(allgrps[0:2], )
        else:
            # No idx is given, so all groups are compared to the first one in the DataFrame column.
            tuple_in = (tuple(allgrps), )
            if len(allgrps) > 2:
                floatContrast = False

    else:
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            tuple_in = (idx, )
            widthratio = [1]
            if len(idx) > 2:
                floatContrast = False
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            tuple_in = idx
            if (any(len(element) > 2 for element in tuple_in)):
                # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                floatContrast = False
            # Make sure the widthratio of the seperate multiplot corresponds to how
            # many groups there are in each one.
            widthratio = []
            for i in tuple_in:
                widthratio.append(len(i))
        else:
            raise TypeError(
                "The object passed to `idx` consists of a mixture of single strings and tuples. \
                Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting."
            )

    # initialise statfunction
    if statfunction == None:
        statfunction = np.mean

    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()
    # # Calculate the bootstraps according to idx.
    # for ix, current_tuple in enumerate(tuple_in):
    #     bscontrast=list()
    #     for i in range (1, len(current_tuple)):
    #     # Note that you start from one. No need to do auto-contrast!
    #         tempbs=bootstrap_contrast(
    #             data=data,
    #             x=x,
    #             y=y,
    #             idx=[current_tuple[0], current_tuple[i]],
    #             statfunction=statfunction,
    #             smoothboot=smoothboot,
    #             reps=reps)
    #         bscontrast.append(tempbs)
    #         contrastList.append(tempbs)
    #         contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol = kwargs['hue']
            colGrps = data[colorCol].unique()
            nColors = len(colGrps)
        else:
            colorCol = x
            colGrps = data[x].unique()
            nColors = len([element for tupl in tuple_in for element in tupl])
        plotPal = dict(zip(colGrps, sns.color_palette(n_colors=nColors)))
    else:
        plotPal = pal

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar = True
        summaryLine = False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine = False

    if swarmYlim is None:
        # get range of _selected groups_.
        u = list()
        for t in idx:
            for i in np.unique(t):
                u.append(i)
        u = np.unique(u)
        tempdat = data[data[x].isin(u)]
        swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])])
    else:
        swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]])

    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])

    barWidth = barWidth / 1000  # Not sure why have to reduce the barwidth by this much!
    if showRawData is True:
        maxSwarmSpan = 0.25
    else:
        maxSwarmSpan = barWidth

    # Expand the ylim in both directions.
    ## Find half of the range of swarm_ylim.
    swarmrange = swarm_ylim[1] - swarm_ylim[0]
    pad = 0.1 * swarmrange
    x2 = np.array([swarm_ylim[0] - pad, swarm_ylim[1] + pad])
    swarm_ylim = x2

    # plot params
    if axis_title_size is None:
        axis_title_size = 25
    if yticksize is None:
        yticksize = 18
    if xticksize is None:
        xticksize = 18

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}
    svgParams = {'fonttype': 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams)

    if figsize is None:
        if len(tuple_in) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (8, (8 / np.sqrt(2)))

    # Initialise figure, taking into account desired figsize.
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(tuple_in)[0],
        # 1 row; columns based on number of tuples in tuple.
        width_ratios=widthratio,
        wspace=0)

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        plotdat = data[data[x].isin(current_tuple)]
        plotdat[x] = plotdat[x].astype("category")
        plotdat[x].cat.set_categories(current_tuple,
                                      ordered=True,
                                      inplace=True)
        plotdat.sort_values(by=[x])
        # Drop all nans.
        plotdat = plotdat.dropna()

        # Calculate summaries.
        summaries = plotdat.groupby([x], sort=True)[y].apply(statfunction)

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            # Create subGridSpec with 2 rows and 1 column.
            subGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx], wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      subGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast = list()
        for i in range(1, len(current_tuple)):
            # Note that you start from one. No need to do auto-contrast!
            tempbs = bootstrap_contrast(
                data=data,
                x=x,
                y=y,
                idx=[current_tuple[0], current_tuple[i]],
                statfunction=statfunction,
                smoothboot=smoothboot,
                reps=reps)
            bscontrast.append(tempbs)
            contrastList.append(tempbs)
            contrastListNames.append(current_tuple[i] + ' vs. ' +
                                     current_tuple[0])

        #### PLOT RAW DATA.
        if showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            ax_raw.set_ylim(swarm_ylim)
            sw = sns.swarmplot(data=plotdat,
                               x=x,
                               y=y,
                               order=current_tuple,
                               ax=ax_raw,
                               alpha=alpha,
                               palette=plotPal,
                               size=rawMarkerSize,
                               marker=rawMarkerType,
                               **kwargs)

        if summaryBar is True:
            bar_raw = sns.barplot(x=summaries.index.tolist(),
                                  y=summaries.values,
                                  facecolor=summaryBarColor,
                                  ax=ax_raw,
                                  alpha=summaryBarAlpha)

        if floatContrast:
            # Get horizontal offset values.
            maxXBefore = max(sw.collections[0].get_offsets().T[0])
            minXAfter = min(sw.collections[1].get_offsets().T[0])
            xposAfter = maxXBefore + floatSwarmSpacer
            xAfterShift = minXAfter - xposAfter
            # shift the swarmplots
            offsetSwarmX(sw.collections[1], -xAfterShift)

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + (width / 2.)
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan / 2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                bar.set_width(maxSwarmSpan)

            ## Set the ticks locations for ax_raw.
            ax_raw.xaxis.set_ticks((0, xposAfter))
            firstTick = ax_raw.xaxis.get_ticklabels()[0].get_text()
            secondTick = ax_raw.xaxis.get_ticklabels()[1].get_text()
            ax_raw.set_xticklabels(
                [
                    firstTick,  #+' n='+count[firstTick],
                    secondTick
                ],  #+' n='+count[secondTick]],
                rotation=tickAngle,
                horizontalalignment=tickAlignment)

        if summaryLine is True:
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i - summaryLineWidth,
                     i + summaryLineWidth),  # x-coordinates
                    (m, m),
                    color=summaryColour,
                    linestyle=summaryLineStyle)

        if show95CI is True:
            sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple) == 2:
            # Plot the CIs on the contrast axes.
            plotbootstrap(sw.collections[1],
                          bslist=tempbs,
                          ax=ax_contrast,
                          violinWidth=violinWidth,
                          violinOffset=violinOffset,
                          markersize=summaryMarkerSize,
                          marker=summaryMarkerType,
                          offset=floatContrast,
                          color=violinColor,
                          linewidth=1)
            if floatContrast:
                # Set reference lines
                ## First get leftmost limit of left reference group
                xtemp, _ = np.array(sw.collections[0].get_offsets()).T
                leftxlim = xtemp.min()
                ## Then get leftmost limit of right test group
                xtemp, _ = np.array(sw.collections[1].get_offsets()).T
                rightxlim = xtemp.min()

                ## zero line
                ax_contrast.hlines(
                    0,  # y-coordinates
                    leftxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastZeroLineStyle,
                    linewidth=0.75,
                    color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(
                    tempbs['summary'],
                    rightxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastEffectSizeLineStyle,
                    linewidth=0.75,
                    color=contrastEffectSizeLineColor)

                ## If the effect size is positive, shift the right axis up.
                if float(tempbs['summary']) > 0:
                    rightmin = ax_raw.get_ylim()[0] - float(tempbs['summary'])
                    rightmax = ax_raw.get_ylim()[1] - float(tempbs['summary'])
                ## If the effect size is negative, shift the right axis down.
                elif float(tempbs['summary']) < 0:
                    rightmin = ax_raw.get_ylim()[0] + float(tempbs['summary'])
                    rightmax = ax_raw.get_ylim()[1] + float(tempbs['summary'])

                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx > 0:
                    ax_contrast.set_ylabel('')

                align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                # Set xlims so everything is properly visible!
                swarm_xbounds = ax_raw.get_xbound()
                ax_contrast.set_xbound(
                    swarm_xbounds[0] - (summaryLineWidth * 1.1),
                    swarm_xbounds[1] + (summaryLineWidth * 1.1))

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(bslist=bscontrast,
                                   ax=ax_contrast,
                                   violinWidth=violinWidth,
                                   violinOffset=violinOffset,
                                   markersize=summaryMarkerSize,
                                   marker=summaryMarkerType,
                                   linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx > 0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    ########
    axesCount = len(fig.get_axes())

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx = fig.axes[i]

        if i != axesCount - 2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(showAllYAxes)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(
                Line2D((axx.xaxis.get_view_interval()[0],
                        axx.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

        # I don't know why the swarm axes controls the contrast axes ticks....
        if showGroupCount:
            count = data.groupby(x).count()[y]
            newticks = list()
            for ix, t in enumerate(axx.xaxis.get_ticklabels()):
                t_text = t.get_text()
                nt = t_text + ' n=' + str(count[t_text])
                newticks.append(nt)
            axx.xaxis.set_ticklabels(newticks)

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i == axesCount - 2:  # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                           bbox_to_anchor=(1.1, 1.0),
                           fontsize=legendFontSize,
                           **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j, i in enumerate(range(1, axesCount, 2)):
        axx = fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright = axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                       xmin=xleft - 1,
                       xmax=xright + 1,
                       linestyle=contrastZeroLineStyle,
                       linewidth=0.75,
                       color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)
            # # Draw back x-axis lines connecting ticks.
            # drawback_x(axx)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes.
                    # Not entirely sure why I have to do this.
                    drawback_y(axx)

            sns.despine(ax=axx,
                        top=True,
                        right=True,
                        left=False,
                        bottom=False,
                        trim=True)

            # Rotate tick labels.
            rotateTicks(axx, tickAngle, tickAlignment)

        else:
            # Re-draw the floating axis to the correct limits.
            lower = np.min(contrastList.ix['diffarray', j])
            upper = np.max(contrastList.ix['diffarray', j])
            meandiff = contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower > 0:
                lower = 0.
            if upper < 0:
                upper = 0.

            ## Get the tick interval from the left y-axis.
            leftticks = fig.get_axes()[i - 1].get_yticks()
            tickstep = leftticks[1] - leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1 = axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount > 2 and contrastShareY is True and floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower = list()
            upper = list()
            for c in range(0, len(contrastList.columns)):
                lower.append(np.min(contrastList.ix['bca_ci_low', c]))
                upper.append(np.max(contrastList.ix['bca_ci_high', c]))
            lower = np.min(lower)
            upper = np.max(upper)
        else:
            lower = contrastYlim[0]
            upper = contrastYlim[1]

        normalizeContrastY(fig,
                           contrast_ylim=contrastYlim,
                           show_all_yaxes=showAllYAxes)

    # if (axesCount==2 and
    #     floatContrast is False):
    #     drawback_x(fig.get_axes()[1])
    #     drawback_y(fig.get_axes()[1])

    # if swarmShareY is False:
    #     for i in range(0, axesCount, 2):
    #         drawback_y(fig.get_axes()[i])

    # if contrastShareY is False:
    #     for i in range(1, axesCount, 2):
    #         if floatContrast is True:
    #             sns.despine(ax=fig.get_axes()[i],
    #                        top=True, right=False, left=True, bottom=True,
    #                        trim=True)
    #         else:
    #             sns.despine(ax=fig.get_axes()[i], trim=True)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're all done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
def pairedcontrast(data, x, y, idcol, reps = 3000,
statfunction = None, idx = None, figsize = None,
beforeAfterSpacer = 0.01, 
violinWidth = 0.005, 
floatOffset = 0.05, 
showRawData = False,
showAllYAxes = False,
floatContrast = True,
smoothboot = False,
floatViolinOffset = None, 
showConnections = True,
summaryBar = False,
contrastYlim = None,
swarmYlim = None,
barWidth = 0.005,
rawMarkerSize = 8,
rawMarkerType = 'o',
summaryMarkerSize = 10,
summaryMarkerType = 'o',
summaryBarColor = 'grey',
meansSummaryLineStyle = 'solid', 
contrastZeroLineStyle = 'solid', contrastEffectSizeLineStyle = 'solid',
contrastZeroLineColor = 'black', contrastEffectSizeLineColor = 'black',
pal = None,
legendLoc = 2, legendFontSize = 12, legendMarkerScale = 1,
axis_title_size = None,
yticksize = None,
xticksize = None,
tickAngle=45,
tickAlignment='right',
**kwargs):

    # Preliminaries.
    data = data.dropna()

    # plot params
    if axis_title_size is None:
        axis_title_size = 15
    if yticksize is None:
        yticksize = 12
    if xticksize is None:
        xticksize = 12

    axisTitleParams = {'labelsize' : axis_title_size}
    xtickParams = {'labelsize' : xticksize}
    ytickParams = {'labelsize' : yticksize}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)

    ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
    if idx is None:
        idx = tuple(np.unique(data[x])[0:2],)
    else:
        # check if multi-plot or not
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple)
            if len(idx) != 2:
                print(idx, "does not have length 2.")
                sys.exit(0)
            else:
                idx = (tuple(idx, ),)
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            if ( any(len(element) != 2 for element in idx) ):
                # If any of the tuples contain more than 2 elements.
                print(element, "does not have length 2.")
                sys.exit(0)
    if floatViolinOffset is None:
        floatViolinOffset = beforeAfterSpacer/2
    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0],contrastYlim[1]])
    if swarmYlim is not None:
        swarmYlim = np.array([swarmYlim[0],swarmYlim[1]])

    ## Here we define the palette on all the levels of the 'x' column.
    ## Thus, if the same pandas dataframe is re-used across different plots,
    ## the color identity of each group will be maintained.
    ## Set palette based on total number of categories in data['x'] or data['hue_column']
    if 'hue' in kwargs:
        u = kwargs['hue']
    else:
        u = x
    if ('color' not in kwargs and 'hue' not in kwargs):
        kwargs['color'] = 'k'

    if pal is None:
        pal = dict( zip( data[u].unique(), sns.color_palette(n_colors = len(data[u].unique())) ) 
                      )
    else:
        pal = pal

    # Initialise figure.
    if figsize is None:
        if len(idx) > 2:
            figsize = (12,(12/np.sqrt(2)))
        else:
            figsize = (6,6)
    fig = plt.figure(figsize = figsize)

    # Initialise GridSpec based on `levs_tuple` shape.
    gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple.
    # Set default statfunction
    if statfunction is None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    for gsIdx, xlevs in enumerate(idx):
        ## Pivot tempdat to get before and after lines.
        data_pivot = data.pivot_table(index = idcol, columns = x, values = y)

        # Start plotting!!
        if floatContrast is True:
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on = False)
            ax_contrast = ax_raw.twinx()
        else:
            gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec = gsMain[gsIdx])
            ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on = False)
            ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex = ax_raw, frame_on = False)

        ## Plot raw data as swarmplot or stripplot.
        if showRawData is True:
            swarm_raw = sns.swarmplot(data = data, 
                                     x = x, y = y, 
                                     order = xlevs,
                                     ax = ax_raw,
                                     palette = pal,
                                     size = rawMarkerSize,
                                     marker = rawMarkerType,
                                     **kwargs)
        else:
            swarm_raw = sns.stripplot(data = data, 
                                     x = x, y = y, 
                                     order = xlevs,
                                     ax = ax_raw,
                                     palette = pal,
                                     **kwargs)
        swarm_raw.set_ylim(swarmYlim)
           
        ## Get some details about the raw data.
        maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0])
        minXAfter = min(swarm_raw.collections[1].get_offsets().T[0])
        if showRawData is True:
            #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2
            beforeAfterSpacer = 1
        xposAfter = maxXBefore + beforeAfterSpacer
        xAfterShift = minXAfter - xposAfter

        ## shift the after swarmpoints closer for aesthetic purposes.
        offsetSwarmX(swarm_raw.collections[1], -xAfterShift)

        ## pandas DataFrame of 'before' group
        x1 = pd.DataFrame({str(xlevs[0] + '_x') : pd.Series(swarm_raw.collections[0].get_offsets().T[0]),
                       xlevs[0] : pd.Series(swarm_raw.collections[0].get_offsets().T[1]),
                       '_R_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[0]),
                       '_G_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[1]),
                       '_B_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[2]),
                      })
        ## join the RGB columns into a tuple, then assign to a column.
        x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) 
        x1 = x1.sort_values(by = xlevs[0])
        x1.index = data_pivot.sort_values(by = xlevs[0]).index

        ## pandas DataFrame of 'after' group
        ### create convenient signifiers for column names.
        befX = str(xlevs[0] + '_x')
        aftX = str(xlevs[1] + '_x')

        x2 = pd.DataFrame( {aftX : pd.Series(swarm_raw.collections[1].get_offsets().T[0]),
            xlevs[1] : pd.Series(swarm_raw.collections[1].get_offsets().T[1])} )
        x2 = x2.sort_values(by = xlevs[1])
        x2.index = data_pivot.sort_values(by = xlevs[1]).index

        ## Join x1 and x2, on both their indexes.
        plotPoints = x1.merge(x2, left_index = True, right_index = True, how='outer')

        ## Add the hue column if hue argument was passed.
        if 'hue' in kwargs:
            h = kwargs['hue']
            plotPoints[h] = data.pivot(index = idcol, columns = x, values = h)[xlevs[0]]
            swarm_raw.legend(loc = legendLoc, 
                fontsize = legendFontSize, 
                markerscale = legendMarkerScale)

        ## Plot the lines to join the 'before' points to their respective 'after' points.
        if showConnections is True:
            for i in plotPoints.index:
                ax_raw.plot([ plotPoints.ix[i, befX],
                    plotPoints.ix[i, aftX] ],
                    [ plotPoints.ix[i, xlevs[0]], 
                    plotPoints.ix[i, xlevs[1]] ],
                    linestyle = 'solid',
                    color = plotPoints.ix[i, '_hue_'],
                    linewidth = 0.75,
                    alpha = 0.75
                    )

        ## Hide the raw swarmplot data if so desired.
        if showRawData is False:
            swarm_raw.collections[0].set_visible(False)
            swarm_raw.collections[1].set_visible(False)

        if showRawData is True:
            #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2
            maxSwarmSpan = 0.5
        else:
            maxSwarmSpan = barWidth            

        ## Plot Summary Bar.
        if summaryBar is True:
            # Calculate means
            means = data.groupby([x], sort = True).mean()[y]
            # # Calculate medians
            # medians = data.groupby([x], sort = True).median()[y]

            ## Draw summary bar.
            bar_raw = sns.barplot(x = means.index, 
                        y = means.values, 
                        order = xlevs,
                        ax = ax_raw,
                        ci = 0,
                        facecolor = summaryBarColor, 
                        alpha = 0.25)
            ## Draw zero reference line.
            ax_raw.add_artist(Line2D(
                (ax_raw.xaxis.get_view_interval()[0], 
                    ax_raw.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )       

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width = bar.get_x()
                width = bar.get_width()
                centre = x_width + width/2.
                if i == 0:
                    bar.set_x(centre - maxSwarmSpan/2.)
                else:
                    bar.set_x(centre - xAfterShift - maxSwarmSpan/2.)
                bar.set_width(maxSwarmSpan)

        # Get y-limits of the treatment swarm points.
        beforeRaw = pd.DataFrame( swarm_raw.collections[0].get_offsets() )
        afterRaw = pd.DataFrame( swarm_raw.collections[1].get_offsets() )
        before_leftx = min(beforeRaw[0])
        after_leftx = min(afterRaw[0])
        after_rightx = max(afterRaw[0])
        after_stat_summary = statfunction(beforeRaw[1])

        # Calculate the summary difference and CI.
        plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]]
        plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0]

        tempseries = plotPoints['delta_y'].tolist()
        test = tempseries.count(tempseries[0]) != len(tempseries)

        bootsDelta = bootstrap(plotPoints['delta_y'],
            statfunction = statfunction, 
            smoothboot = smoothboot,
            reps = reps)
        summDelta = bootsDelta['summary']
        lowDelta = bootsDelta['bca_ci_low']
        highDelta = bootsDelta['bca_ci_high']

        # set new xpos for delta violin.
        if floatContrast is True:
            if showRawData is False:
                xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset
            else:
                xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan
        else:
            xposPlusViolin = xposAfter
        if showRawData is True:
            # If showRawData is True and floatContrast is True, 
            # set violinwidth to the barwidth.
            violinWidth = maxSwarmSpan

        xmaxPlot = xposPlusViolin + violinWidth

        # Plot the summary measure.
        ax_contrast.plot(xposPlusViolin, summDelta,
            marker = 'o',
            markerfacecolor = 'k', 
            markersize = summaryMarkerSize,
            alpha = 0.75
            )

        # Plot the CI.
        ax_contrast.plot([xposPlusViolin, xposPlusViolin],
            [lowDelta, highDelta],
            color = 'k', 
            alpha = 0.75,
            linestyle = 'solid'
            )

        # Plot the violin-plot.
        v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], 
                                 widths = violinWidth, 
                                 showextrema = False, 
                                 showmeans = False)
        halfviolin(v, half = 'right', color = 'k')

        # Remove left axes x-axis title.
        ax_raw.set_xlabel("")
        # Remove floating axes y-axis title.
        ax_contrast.set_ylabel("")

        # Set proper x-limits
        ax_raw.set_xlim(before_leftx - beforeAfterSpacer/2, xmaxPlot)
        ax_raw.get_xaxis().set_view_interval(before_leftx - beforeAfterSpacer/2, 
            after_rightx + beforeAfterSpacer/2)
        ax_contrast.set_xlim(ax_raw.get_xlim())

        if floatContrast is True:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))

            # Make sure they have the same y-limits.
            ax_contrast.set_ylim(ax_raw.get_ylim())
            
            # Drawing in the x-axis for ax_raw.
            ## Set the tick labels!
            ax_raw.set_xticklabels(xlevs, rotation = tickAngle, horizontalalignment = tickAlignment)
            ## Get lowest y-value for ax_raw.
            y = ax_raw.get_yaxis().get_view_interval()[0] 

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]),
                           ax_contrast, 0)

            # Add label to floating axes. But on ax_raw!
            ax_raw.text(x = deltaSwarmX,
                          y = ax_raw.get_yaxis().get_view_interval()[0],
                          horizontalalignment = 'left',
                          s = 'Difference',
                          fontsize = 15)        

            # Set reference lines
            ## zero line
            ax_contrast.hlines(0,                                           # y-coordinate
                            ax_contrast.xaxis.get_majorticklocs()[0],       # x-coordinates, start and end.
                            ax_raw.xaxis.get_view_interval()[1],   
                            linestyle = 'solid',
                            linewidth = 0.75,
                            color = 'black')

            ## effect size line
            ax_contrast.hlines(summDelta, 
                            ax_contrast.xaxis.get_majorticklocs()[1],
                            ax_raw.xaxis.get_view_interval()[1],
                            linestyle = 'solid',
                            linewidth = 0.75,
                            color = 'black')

            # Align the left axes and the floating axes.
            align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.)
        else:
            # Set the ticks locations for ax_raw.
            ax_raw.get_xaxis().set_ticks((0, xposAfter))
            
            fig.add_subplot(ax_raw)
            fig.add_subplot(ax_contrast)
        ax_contrast.set_ylim(contrastYlim)
        # Calculate p-values.
        # 1-sample t-test to see if the mean of the difference is different from 0.
        ttestresult = ttest_1samp(plotPoints['delta_y'], popmean = 0)[1]
        bootsDelta['ttest_pval'] = ttestresult
        contrastList.append(bootsDelta)
        contrastListNames.append( str(xlevs[1])+' v.s. '+str(xlevs[0]) )

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Now we iterate thru the contrast axes to normalize all the ylims.
    for j,i in enumerate(range(1, len(fig.get_axes()), 2)):
        axx=fig.get_axes()[i]
        ## Get max and min of the dataset.
        lower = np.min(contrastList.ix['stat_array',j])
        upper = np.max(contrastList.ix['stat_array',j])
        meandiff = contrastList.ix['summary', j]

        ## Make sure we have zero in the limits.
        if lower > 0:
            lower = 0.
        if upper < 0:
            upper = 0.

        ## Get tick distance on raw axes.
        ## This will be the tick distance for the contrast axes.
        rawAxesTicks = fig.get_axes()[i-1].yaxis.get_majorticklocs()
        rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0]

        ## First re-draw of axis with new tick interval
        axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist))
        newticks1 = fig.get_axes()[i].get_yticks()

        if floatContrast is False:
            if (showAllYAxes is False and i in range( 2, len(fig.get_axes())) ):
                axx.get_yaxis().set_visible(showAllYAxes)
            else:
                ## Obtain major ticks that comfortably encompass lower and upper.
                newticks2 = list()
                for a,b in enumerate(newticks1):
                    if (b >= lower and b <= upper):
                        # if the tick lies within upper and lower, take it.
                        newticks2.append(b)
                # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
                if np.max(newticks2) < meandiff:
                    ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                    newticks2.append( newticks1[ind+1] )
                elif meandiff < np.min(newticks2):
                    ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                    newticks2.append( newticks1[ind-1] )
                newticks2 = np.array(newticks2)
                newticks2.sort()
                axx.yaxis.set_major_locator(FixedLocator(locs = newticks2))

                ## Draw zero reference line.
                axx.hlines(y = 0,
                    xmin = fig.get_axes()[i].get_xaxis().get_view_interval()[0], 
                    xmax = fig.get_axes()[i].get_xaxis().get_view_interval()[1],
                    linestyle = contrastZeroLineStyle,
                    linewidth = 0.75,
                    color = contrastZeroLineColor)

                sns.despine(ax = fig.get_axes()[i], trim = True, 
                    bottom = False, right = True,
                    left = False, top = True)

                ## Draw back the lines for the relevant y-axes.
                drawback_y(axx)

                ## Draw back the lines for the relevant x-axes.
                drawback_x(axx)

        elif floatContrast is True:
            ## Get the original ticks on the floating y-axis.
            newticks1 = fig.get_axes()[i].get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Re-draw the axis.
            axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) 

            ## Despine and trim the axes.
            sns.despine(ax = axx, trim = True, 
                bottom = False, right = False,
                left = True, top = True)

    for i in range(0, len(fig.get_axes()), 2):
        # Loop through the raw data swarmplots and despine them appropriately.
        if floatContrast is True:
            sns.despine(ax = fig.get_axes()[i], trim = True, right = True)

        else:
            sns.despine(ax = fig.get_axes()[i], trim = True, bottom = True, right = True)
            fig.get_axes()[i].get_xaxis().set_visible(False)

        # Draw back the lines for the relevant y-axes.
        ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0]
        ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1]
        x, _ = fig.get_axes()[i].get_xaxis().get_view_interval()
        fig.get_axes()[i].add_artist(Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5))    

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace = 0)
    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList
Ejemplo n.º 7
0
def contrastplot(
    data, x=None, y=None, idx=None, idcol=None,

    alpha=0.75, 
    axis_title_size=None,

    ci=95,
    contrastShareY=True,
    contrastEffectSizeLineStyle='solid',
    contrastEffectSizeLineColor='black',

    contrastYlim=None,
    contrastZeroLineStyle='solid', 
    contrastZeroLineColor='black', 
    connectPairs=True,

    effectSizeYLabel="Effect Size", 

    figsize=None, 
    floatContrast=True,
    floatSwarmSpacer=0.2,

    heightRatio=(1, 1),

    lineWidth=2,
    legend=True,
    legendFontSize=14,
    legendFontProps={},

    paired=False,
    pairedDeltaLineAlpha=0.3,
    pairedDeltaLineWidth=1.2,
    pal=None, 

    rawMarkerSize=8,
    rawMarkerType='o',
    reps=3000,
    
    showGroupCount=True,
    showCI=False, 
    showAllYAxes=False,
    showRawData=True,
    smoothboot=False, 
    statfunction=None, 

    summaryBar=False, 
    summaryBarColor='grey',
    summaryBarAlpha=0.25,

    summaryColour='black', 
    summaryLine=True, 
    summaryLineStyle='solid', 
    summaryLineWidth=0.25, 

    summaryMarkerSize=10, 
    summaryMarkerType='o',

    swarmShareY=True, 
    swarmYlim=None, 

    tickAngle=45,
    tickAlignment='right',

    violinOffset=0.375,
    violinWidth=0.2, 
    violinColor='k',

    xticksize=None,
    yticksize=None,

    **kwargs):

    '''Takes a pandas DataFrame and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    Paired and unpaired options available.

    Keyword arguments:
        data: pandas DataFrame
            
        x: string
            column name containing categories to be plotted on the x-axis.

        y: string
            column name containing values to be plotted on the y-axis.

        idx: tuple
            flxible declaration of groupwise comparisons.

        idcol: string
            for paired plots.

        alpha: float
            alpha (transparency) of raw swarmed data points.
            
        axis_title_size=None
        ci=95
        contrastShareY=True
        contrastEffectSizeLineStyle='solid'
        contrastEffectSizeLineColor='black'
        contrastYlim=None
        contrastZeroLineStyle='solid'
        contrastZeroLineColor='black'
        effectSizeYLabel="Effect Size"
        figsize=None
        floatContrast=True
        floatSwarmSpacer=0.2
        heightRatio=(1,1)
        lineWidth=2
        legend=True
        legendFontSize=14
        legendFontProps={}
        paired=False
        pairedDeltaLineAlpha=0.3
        pairedDeltaLineWidth=1.2
        pal=None
        rawMarkerSize=8
        rawMarkerType='o'
        reps=3000
        showGroupCount=True
        showCI=False
        showAllYAxes=False
        showRawData=True
        smoothboot=False
        statfunction=None
        summaryBar=False
        summaryBarColor='grey'
        summaryBarAlpha=0.25
        summaryColour='black'
        summaryLine=True
        summaryLineStyle='solid'
        summaryLineWidth=0.25
        summaryMarkerSize=10
        summaryMarkerType='o'
        swarmShareY=True
        swarmYlim=None
        tickAngle=45
        tickAlignment='right'
        violinOffset=0.375
        violinWidth=0.2
        violinColor='k'
        xticksize=None
        yticksize=None

    Returns:
        An matplotlib Figure.
        Organization of figure Axes.
    '''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError("The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # make sure that at least x, y, and idx are specified.
    if x is None and y is None and idx is None:
        raise ValueError('You need to specify `x` and `y`, or `idx`. Neither has been specifed.')

    if x is None:
        # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column.
        datatype='wide'
        # Check that the idx are legit columns.
        all_idx=np.unique([element for tupl in idx for element in tupl])
        # # melt the data.
        # data=pd.melt(data,value_vars=all_idx)
        # x='variable'
        # y='value'
    else:
        # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint.
        datatype='long'
        # make sure y is not none.
        if y is None:
            raise ValueError("`paired` is false, but no y-column given.")
        # Calculate Ns.
        counts=data.groupby(x)[y].count()

    # Get and set levels of data[x]
    if paired is True:
        violinWidth=0.1
        # # Calculate Ns--which should be simply the number of rows in data.
        # counts=len(data)
        # is idcol supplied?
        if idcol is None and datatype=='long':
            raise ValueError('`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.')
        if idx is not None:
            # check if multi-plot or not
            if all(isinstance(element, str) for element in idx):
                # check that every idx is a column name.
                idx_not_in_cols=[n
                for n in idx
                if n not in data[x].unique()]
                if len(idx_not_in_cols)!=0:
                    raise ValueError(str(idx_not_in_cols)+" cannot be found in the columns of `data`.")
                # data_wide_cols=[n for n in idx if n in data.columns]
                # if idx is supplied but not a multiplot (ie single list or tuple)
                if len(idx) != 2:
                    raise ValueError(idx+" does not have length 2.")
                else:
                    tuple_in=(tuple(idx, ),)
                widthratio=[1]
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_cols=[n
                for tup in idx
                for n in tup
                if n not in data[x].unique()]
                if len(idx_not_in_cols)!=0:
                    raise ValueError(str(idx_not_in_cols)+" cannot be found in the column "+x)
                # data_wide_cols=[n for tup in idx for n in tup if n in data.columns]
                if ( any(len(element) != 2 for element in idx) ):
                    # If any of the tuples does not contain exactly 2 elements.
                    raise ValueError(element+" does not have length 2.")
                # Make sure the widthratio of the seperate multiplot corresponds to how 
                # many groups there are in each one.
                tuple_in=idx
                widthratio=[]
                for i in tuple_in:
                    widthratio.append(len(i))
        elif idx is None:
            raise ValueError('Please specify idx.')
        showRawData=False # Just show lines, do not show data.
        showCI=False # wait till I figure out how to plot this for sns.barplot.
        if datatype=='long':
            if idx is None:
                ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
                tuple_in=tuple(np.sort(np.unique(data[x]))[0:2],)
            # pivot the dataframe if it is long!
            data_pivot=data.pivot_table(index = idcol, columns = x, values = y)

    elif paired is False:
        if idx is None:
            widthratio=[1]
            tuple_in=( tuple(data[x].unique()) ,)
            if len(tuple_in[0])>2:
                floatContrast=False
        else:
            if all(isinstance(element, str) for element in idx):
                # if idx is supplied but not a multiplot (ie single list or tuple)
                # check all every idx specified can be found in data[x]
                idx_not_in_x=[n for n in idx 
                if n not in data[x].unique()]
                if len(idx_not_in_x)!=0:
                    raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x)
                tuple_in=(idx, )
                widthratio=[1]
                if len(idx)>2:
                    floatContrast=False
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_x=[n
                for tup in idx
                for n in tup
                if n not in data[x].unique()]
                if len(idx_not_in_x)!=0:
                    raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x)
                tuple_in=idx

                if ( any(len(element)>2 for element in tuple_in) ):
                    # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                    floatContrast=False
                # Make sure the widthratio of the seperate multiplot corresponds to how 
                # many groups there are in each one.
                widthratio=[]
                for i in tuple_in:
                    widthratio.append(len(i))
            else:
                raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \
                    Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting.")

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar=True
        summaryLine=False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine=False
    # initialise statfunction
    if statfunction == None:
        statfunction=np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList=list()
    contrastListNames=list()

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol=kwargs['hue']
            if colorCol not in data.columns:
                raise ValueError(colorCol+' is not a column name.')
            colGrps=data[colorCol].unique()#.tolist()
            plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) )
        else:
            if datatype=='long':
                colGrps=data[x].unique()#.tolist()
                plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) )
            if datatype=='wide':
                plotPal=np.repeat('k',len(data))
    else:
        if datatype=='long':
            plotPal=pal
        if datatype=='wide':
            plotPal=list(map(lambda x:pal[x], data[hue]))

    if swarmYlim is None:
        # get range of _selected groups_.
        # u = list()
        # for t in tuple_in:
        #     for i in np.unique(t):
        #         u.append(i)
        # u = np.unique(u)
        u=np.unique([element for tupl in tuple_in for element in tupl])
        if datatype=='long':
            tempdat=data[data[x].isin(u)]
            swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])])
        if datatype=='wide':
            allMin=list()
            allMax=list()
            for col in u:
                allMin.append(np.min(data[col]))
                allMax.append(np.max(data[col]))
            swarm_ylim=np.array( [np.min(allMin),np.max(allMax)] )
        swarm_ylim=np.round(swarm_ylim)
    else:
        swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]])

    if summaryBar is True:
        lims=swarm_ylim
        # check that 0 lies within the desired limits.
        # if not, extend (upper or lower) limit to zero.
        if 0 not in range( int(round(lims[0])),int(round(lims[1])) ): # turn swarm_ylim to integer range.
            # check if all negative:.
            if lims[0]<0. and lims[1]<0.:
                swarm_ylim=np.array([np.min(lims),0.])
            # check if all positive.
            elif lims[0]>0. and lims[1]>0.:
                swarm_ylim=np.array([0.,np.max(lims)])

    if contrastYlim is not None:
        contrastYlim=np.array([contrastYlim[0],contrastYlim[1]])

    # plot params
    if axis_title_size is None:
        axis_title_size=27
    if yticksize is None:
        yticksize=22
    if xticksize is None:
        xticksize=22

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams={'labelsize' : axis_title_size}
    xtickParams={'labelsize' : xticksize}
    ytickParams={'labelsize' : yticksize}
    svgParams={'fonttype' : 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams) 

    if figsize is None:
        if len(tuple_in)>2:
            figsize=(12,(12/np.sqrt(2)))
        else:
            figsize=(8,(8/np.sqrt(2)))
    
    # calculate CI.
    if ci<0 or ci>100:
        raise ValueError('ci should be between 0 and 100.')
    alpha_level=(100.-ci)/100.

    # Initialise figure, taking into account desired figsize.
    fig=plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain=gridspec.GridSpec( 
        1, np.shape(tuple_in)[0], 
         # 1 row; columns based on number of tuples in tuple.
         width_ratios=widthratio,
         wspace=0 )

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        if datatype=='long':
            plotdat=data[data[x].isin(current_tuple)]
            plotdat[x]=plotdat[x].astype("category")
            plotdat[x].cat.set_categories(
                current_tuple,
                ordered=True,
                inplace=True)
            plotdat.sort_values(by=[x])
            # # Drop all nans. 
            # plotdat.dropna(inplace=True)
            summaries=plotdat.groupby(x)[y].apply(statfunction)
        if datatype=='wide':
            plotdat=data[list(current_tuple)]
            summaries=statfunction(plotdat)
            plotdat=pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA.
            
        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot.
            ax_raw=fig.add_subplot(gsMain[gsIdx],
                frame_on=False)
            ax_contrast=ax_raw.twinx()
        else:
        # Create subGridSpec with 2 rows and 1 column.
            subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1,
                subplot_spec=gsMain[gsIdx],
                wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw=plt.Subplot(fig,
                subGridSpec[0, 0],
                frame_on=False)
            ax_contrast=plt.Subplot(fig,
                subGridSpec[1, 0],
                sharex=ax_raw,
                frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast=list()
        if paired is False:
            tempplotdat=plotdat[[x,y]] # only select the columns used for x and y plotting.
            for i in range (1, len(current_tuple)):
                # Note that you start from one. No need to do auto-contrast!
                # if datatype=='long':aas
                    tempbs=bootstrap_contrast(
                        data=tempplotdat.dropna(), 
                        x=x,
                        y=y,
                        idx=[current_tuple[0], current_tuple[i]],
                        statfunction=statfunction,
                        smoothboot=smoothboot,
                        alpha_level=alpha_level,
                        reps=reps)
                    bscontrast.append(tempbs)
                    contrastList.append(tempbs)
                    contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

        #### PLOT RAW DATA.
        ax_raw.set_ylim(swarm_ylim)
        # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())
        if paired is False and showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            sw=sns.swarmplot(
                data=plotdat, 
                x=x, y=y, 
                order=current_tuple, 
                ax=ax_raw, 
                alpha=alpha, 
                palette=plotPal,
                size=rawMarkerSize,
                marker=rawMarkerType,
                **kwargs)

            if floatContrast:
                # Get horizontal offset values.
                maxXBefore=max(sw.collections[0].get_offsets().T[0])
                minXAfter=min(sw.collections[1].get_offsets().T[0])
                xposAfter=maxXBefore+floatSwarmSpacer
                xAfterShift=minXAfter-xposAfter
                # shift the (second) swarmplot
                offsetSwarmX(sw.collections[1], -xAfterShift)
                # shift the tick.
                ax_raw.set_xticks([0.,1-xAfterShift])

        elif paired is True:
            if showRawData is True:
                sw=sns.swarmplot(data=plotdat, 
                    x=x, y=y, 
                    order=current_tuple, 
                    ax=ax_raw, 
                    alpha=alpha, 
                    palette=plotPal,
                    size=rawMarkerSize,
                    marker=rawMarkerType,
                **kwargs)
            if connectPairs is True:
                # Produce paired plot with lines.
                before=plotdat[plotdat[x]==current_tuple[0]][y].tolist()
                after=plotdat[plotdat[x]==current_tuple[1]][y].tolist()
                linedf=pd.DataFrame(
                    {'before':before,
                    'after':after}
                    )
                # to get color, need to loop thru each line and plot individually.
                for ii in range(0,len(linedf)):
                    ax_raw.plot( [0,0.25], [ linedf.loc[ii,'before'],
                                            linedf.loc[ii,'after'] ],
                                linestyle='solid',
                                linewidth=pairedDeltaLineWidth,
                                color=plotPal[current_tuple[0]],
                                alpha=pairedDeltaLineAlpha,
                               )
                ax_raw.set_xlim(-0.25,0.5)
                ax_raw.set_xticks([0,0.25])
                ax_raw.set_xticklabels([current_tuple[0],current_tuple[1]])

        # if swarmYlim is None:
        #     # if swarmYlim was not specified, tweak the y-axis 
        #     # to show all the data without losing ticks and range.
        #     ## Get all yticks.
        #     axxYTicks=ax_raw.yaxis.get_majorticklocs()
        #     ## Get ytick interval.
        #     YTickInterval=axxYTicks[1]-axxYTicks[0]
        #     ## Get current ylim
        #     currentYlim=ax_raw.get_ylim()
        #     ## Extend ylim by adding a fifth of the tick interval as spacing at both ends.
        #     ax_raw.set_ylim(
        #         currentYlim[0]-(YTickInterval/5),
        #         currentYlim[1]+(YTickInterval/5)
        #         )
        #     ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())

        if summaryBar is True:
            if paired is False:
                bar_raw=sns.barplot(
                    x=summaries.index.tolist(),
                    y=summaries.values,
                    facecolor=summaryBarColor,
                    ax=ax_raw,
                    alpha=summaryBarAlpha)
                if floatContrast is True:
                    maxSwarmSpan=2/10.
                    xlocs=list()
                    for i, bar in enumerate(bar_raw.patches):
                        x_width=bar.get_x()
                        width=bar.get_width()
                        centre=x_width + (width/2.)
                        if i == 0:
                            bar.set_x(centre-maxSwarmSpan/2.)
                            xlocs.append(centre)
                        else:
                            bar.set_x(centre-xAfterShift-maxSwarmSpan/2.)
                            xlocs.append(centre-xAfterShift)
                        bar.set_width(maxSwarmSpan)
                    ax_raw.set_xticks(xlocs) # make sure xticklocs match the barplot.
                elif floatContrast is False:
                    maxSwarmSpan=4/10.
                    xpos=ax_raw.xaxis.get_majorticklocs()
                    for i, bar in enumerate(bar_raw.patches):
                        bar.set_x(xpos[i]-maxSwarmSpan/2.)
                        bar.set_width(maxSwarmSpan)
            else:
                # if paired is true
                ax_raw.bar([0,0.25], 
                    [ statfunction(plotdat[current_tuple[0]]),
                    statfunction(plotdat[current_tuple[1]]) ],
                    color=summaryBarColor,
                    alpha=0.5,
                    width=0.05)
                ## Draw zero reference line.
                ax_raw.add_artist(Line2D(
                    (ax_raw.xaxis.get_view_interval()[0],
                     ax_raw.xaxis.get_view_interval()[1]),
                    (0,0),
                    color='k', linewidth=1.25)
                                 )

        if summaryLine is True:
            if paired is True:
                xdelta=0
            else:
                xdelta=summaryLineWidth
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i-xdelta, 
                    i+xdelta), # x-coordinates
                    (m, m),
                    color=summaryColour, 
                    linestyle=summaryLineStyle)

        if showCI is True:
                sns.barplot(
                    data=plotdat, 
                    x=x, y=y, 
                    ax=ax_raw, 
                    alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple)==2:
            if paired is False:
                # Plot the CIs on the contrast axes.
                plotbootstrap(sw.collections[1],
                              bslist=tempbs,
                              ax=ax_contrast, 
                              violinWidth=violinWidth,
                              violinOffset=violinOffset,
                              markersize=summaryMarkerSize,
                              marker=summaryMarkerType,
                              offset=floatContrast,
                              color=violinColor,
                              linewidth=1)
            else:
                bootsDelta = bootstrap(
                    plotdat[current_tuple[1]]-plotdat[current_tuple[0]],
                    statfunction=statfunction,
                    smoothboot=smoothboot,
                    alpha_level=alpha_level,
                    reps=reps)
                contrastList.append(bootsDelta)
                contrastListNames.append(current_tuple[1]+' vs. '+current_tuple[0])
                summDelta = bootsDelta['summary']
                lowDelta = bootsDelta['bca_ci_low']
                highDelta = bootsDelta['bca_ci_high']

                if floatContrast:
                    xpos=0.375
                else:
                    xpos=0.25

                # Plot the summary measure.
                ax_contrast.plot(xpos, bootsDelta['summary'],
                         marker=summaryMarkerType,
                         markerfacecolor='k',
                         markersize=summaryMarkerSize,
                         alpha=0.75
                        )
                # Plot the CI.
                ax_contrast.plot([xpos, xpos],
                         [lowDelta, highDelta],
                         color='k',
                         alpha=0.75,
                         # linewidth=1,
                         linestyle='solid'
                        )
                
                # Plot the violin-plot.
                v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], 
                                           widths = violinWidth, 
                                           showextrema = False, 
                                           showmeans = False)
                halfviolin(v, half = 'right', color = 'k')

            if floatContrast:
                # Set reference lines
                if paired is False:
                    ## First get leftmost limit of left reference group
                    xtemp, _=np.array(sw.collections[0].get_offsets()).T
                    leftxlim=xtemp.min()
                    ## Then get leftmost limit of right test group
                    xtemp, _=np.array(sw.collections[1].get_offsets()).T
                    rightxlim=xtemp.min()
                    ref=tempbs['summary']
                else:
                    leftxlim=0
                    rightxlim=0.25
                    ref=bootsDelta['summary']
                    ax_contrast.set_xlim(-0.25, 0.5) # does this work?

                ## zero line
                ax_contrast.hlines(0,                   # y-coordinates
                                leftxlim, 3.5,       # x-coordinates, start and end.
                                linestyle=contrastZeroLineStyle,
                                linewidth=1,
                                color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(ref, 
                                rightxlim, 3.5,        # x-coordinates, start and end.
                                linestyle=contrastEffectSizeLineStyle,
                                linewidth=1,
                                color=contrastEffectSizeLineColor)


                if paired is False:
                    es=float(tempbs['summary'])
                    refSum=tempbs['statistic_ref']
                else:
                    es=float(bootsDelta['summary'])
                    refSum=statfunction(plotdat[current_tuple[0]])
                ## If the effect size is positive, shift the right axis up.
                if es>0:
                    rightmin=ax_raw.get_ylim()[0]-es
                    rightmax=ax_raw.get_ylim()[1]-es
                ## If the effect size is negative, shift the right axis down.
                elif es<0:
                    rightmin=ax_raw.get_ylim()[0]+es
                    rightmax=ax_raw.get_ylim()[1]+es
                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx>0:
                    ax_contrast.set_ylabel('')
                align_yaxis(ax_raw, refSum, ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                if paired is False:
                    # Set xlims so everything is properly visible!
                    swarm_xbounds=ax_raw.get_xbound()
                    ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), 
                        swarm_xbounds[1] + (summaryLineWidth * 1.1))
                else:
                    ax_contrast.set_xlim(-0.05,0.25+violinWidth)

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(
                bslist=bscontrast,
                ax=ax_contrast,
                violinWidth=violinWidth,
                violinOffset=violinOffset,
                markersize=summaryMarkerSize,
                marker=summaryMarkerType,
                linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx>0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList=pd.DataFrame(contrastList).T
    contrastList.columns=contrastListNames

    # Get number of axes in figure for aesthetic tweaks.
    axesCount=len(fig.get_axes())
    for i in range(0, axesCount, 2):
        # Set new tick labels.
        # The tick labels belong to the SWARM axes
        # for both floating and non-floating plots.
        # This is because `sharex` was invoked.
        axx=fig.axes[i]
        newticklabs=list()
        for xticklab in axx.xaxis.get_ticklabels():
            t=xticklab.get_text()
            if paired:
                N=str(counts)
            else:
                N=str(counts.ix[t])

            if showGroupCount:
                newticklabs.append(t+' n='+N)
            else:
                newticklabs.append(t)
            axx.set_xticklabels(
                newticklabs,
                rotation=tickAngle,
                horizontalalignment=tickAlignment)

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx=fig.axes[i]

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if i==0:
            drawback_y(axx)

        if i!=axesCount-2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(False)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)
        else:
            drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(Line2D(
                (axx.xaxis.get_view_interval()[0], 
                    axx.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )
        
        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i==axesCount-2: # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                    bbox_to_anchor=(1.1,1.0),
                    fontsize=legendFontSize,
                    **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j,i in enumerate(range(1, axesCount, 2)):
        axx=fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright=axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                xmin=xleft-1, 
                xmax=xright+1,
                linestyle=contrastZeroLineStyle,
                linewidth=0.75,
                color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes, only is axesCount is 2.
                    # Not entirely sure why I have to do this.
                    if axesCount==2:
                        drawback_y(axx)

            sns.despine(ax=axx, 
                top=True, right=True, 
                left=False, bottom=False, 
                trim=True)
            if j==0 and axesCount==2:
                # Draw back x-axis lines connecting ticks.
                drawback_x(axx)
            # Rotate tick labels.
            rotateTicks(axx,tickAngle,tickAlignment)

        elif floatContrast is True:
            if paired is True:
                # Get the bootstrapped contrast range.
                lower=np.min(contrastList.ix['stat_array',j])
                upper=np.max(contrastList.ix['stat_array',j])
            else:
                lower=np.min(contrastList.ix['diffarray',j])
                upper=np.max(contrastList.ix['diffarray',j])
            meandiff=contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower>0:
                lower=0.
            if upper<0:
                upper=0.

            ## Get the tick interval from the left y-axis.
            leftticks=fig.get_axes()[i-1].get_yticks()
            tickstep=leftticks[1] -leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1=axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2=list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2)<meandiff:
                ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff<np.min(newticks2):
                ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2=np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))
            
            ## Despine the axes.
            sns.despine(ax=axx, trim=True, 
                bottom=False, right=False,
                left=True, top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount>2 and 
        contrastShareY is True and 
        floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower=list()
            upper=list()
            for c in range(0,len(contrastList.columns)):
                lower.append( np.min(contrastList.ix['bca_ci_low',c]) )
                upper.append( np.max(contrastList.ix['bca_ci_high',c]) )
            lower=np.min(lower)
            upper=np.max(upper)
        else:
            lower=contrastYlim[0]
            upper=contrastYlim[1]

        normalizeContrastY(fig, 
            contrast_ylim = contrastYlim, 
            show_all_yaxes = showAllYAxes)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)
    
    # And we're all done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList
plt.show()

# Create scatterplot matrix
fig = sns.pairplot(data=data[[
    'alcohol', 'color_intensity', 'malic_acid', 'magnesium', 'category'
]],
                   hue='category')

plt.show()

# ------------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------------

# Create bee swarm plot
sns.swarmplot(x='category', y='total_phenols', data=data)
plt.show()

# ------------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------------

# Cumulative Distribution Function Plots

# Sort and normalize data
x = np.sort(data['hue'])
y = np.arange(1, x.shape[0] + 1, dtype='float32') / x.shape[0]

plt.plot(x, y, marker='o', linestyle='')

plt.ylabel('ECDF')
Ejemplo n.º 9
0
def contrastplot(data,
                 x=None,
                 y=None,
                 idx=None,
                 idcol=None,
                 alpha=0.75,
                 axis_title_size=None,
                 ci=95,
                 contrastShareY=True,
                 contrastEffectSizeLineStyle='solid',
                 contrastEffectSizeLineColor='black',
                 contrastYlim=None,
                 contrastZeroLineStyle='solid',
                 contrastZeroLineColor='black',
                 connectPairs=True,
                 effectSizeYLabel="Effect Size",
                 figsize=None,
                 floatContrast=True,
                 floatSwarmSpacer=0.2,
                 heightRatio=(1, 1),
                 lineWidth=2,
                 legend=True,
                 legendFontSize=14,
                 legendFontProps={},
                 paired=False,
                 pairedDeltaLineAlpha=0.3,
                 pairedDeltaLineWidth=1.2,
                 pal=None,
                 rawMarkerSize=8,
                 rawMarkerType='o',
                 reps=3000,
                 showGroupCount=True,
                 showCI=False,
                 showAllYAxes=False,
                 showRawData=True,
                 smoothboot=False,
                 statfunction=None,
                 summaryBar=False,
                 summaryBarColor='grey',
                 summaryBarAlpha=0.25,
                 summaryColour='black',
                 summaryLine=True,
                 summaryLineStyle='solid',
                 summaryLineWidth=0.25,
                 summaryMarkerSize=10,
                 summaryMarkerType='o',
                 swarmShareY=True,
                 swarmYlim=None,
                 tickAngle=45,
                 tickAlignment='right',
                 violinOffset=0.375,
                 violinWidth=0.2,
                 violinColor='k',
                 xticksize=None,
                 yticksize=None,
                 **kwargs):
    '''Takes a pandas DataFrame and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    Paired and unpaired options available.

    Keyword arguments:
        data: pandas DataFrame
            
        x: string
            column name containing categories to be plotted on the x-axis.

        y: string
            column name containing values to be plotted on the y-axis.

        idx: tuple
            flxible declaration of groupwise comparisons.

        idcol: string
            for paired plots.

        alpha: float
            alpha (transparency) of raw swarmed data points.
            
        axis_title_size=None
        ci=95
        contrastShareY=True
        contrastEffectSizeLineStyle='solid'
        contrastEffectSizeLineColor='black'
        contrastYlim=None
        contrastZeroLineStyle='solid'
        contrastZeroLineColor='black'
        effectSizeYLabel="Effect Size"
        figsize=None
        floatContrast=True
        floatSwarmSpacer=0.2
        heightRatio=(1,1)
        lineWidth=2
        legend=True
        legendFontSize=14
        legendFontProps={}
        paired=False
        pairedDeltaLineAlpha=0.3
        pairedDeltaLineWidth=1.2
        pal=None
        rawMarkerSize=8
        rawMarkerType='o'
        reps=3000
        showGroupCount=True
        showCI=False
        showAllYAxes=False
        showRawData=True
        smoothboot=False
        statfunction=None
        summaryBar=False
        summaryBarColor='grey'
        summaryBarAlpha=0.25
        summaryColour='black'
        summaryLine=True
        summaryLineStyle='solid'
        summaryLineWidth=0.25
        summaryMarkerSize=10
        summaryMarkerType='o'
        swarmShareY=True
        swarmYlim=None
        tickAngle=45
        tickAlignment='right'
        violinOffset=0.375
        violinWidth=0.2
        violinColor='k'
        xticksize=None
        yticksize=None

    Returns:
        An matplotlib Figure.
        Organization of figure Axes.
    '''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError(
            "The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # make sure that at least x, y, and idx are specified.
    if x is None and y is None and idx is None:
        raise ValueError(
            'You need to specify `x` and `y`, or `idx`. Neither has been specifed.'
        )

    if x is None:
        # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column.
        datatype = 'wide'
        # Check that the idx are legit columns.
        all_idx = np.unique([element for tupl in idx for element in tupl])
        # # melt the data.
        # data=pd.melt(data,value_vars=all_idx)
        # x='variable'
        # y='value'
    else:
        # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint.
        datatype = 'long'
        # make sure y is not none.
        if y is None:
            raise ValueError("`paired` is false, but no y-column given.")
        # Calculate Ns.
        counts = data.groupby(x)[y].count()

    # Get and set levels of data[x]
    if paired is True:
        violinWidth = 0.1
        # # Calculate Ns--which should be simply the number of rows in data.
        # counts=len(data)
        # is idcol supplied?
        if idcol is None and datatype == 'long':
            raise ValueError(
                '`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.'
            )
        if idx is not None:
            # check if multi-plot or not
            if all(isinstance(element, str) for element in idx):
                # check that every idx is a column name.
                idx_not_in_cols = [n for n in idx if n not in data[x].unique()]
                if len(idx_not_in_cols) != 0:
                    raise ValueError(
                        str(idx_not_in_cols) +
                        " cannot be found in the columns of `data`.")
                # data_wide_cols=[n for n in idx if n in data.columns]
                # if idx is supplied but not a multiplot (ie single list or tuple)
                if len(idx) != 2:
                    raise ValueError(idx + " does not have length 2.")
                else:
                    tuple_in = (tuple(idx, ), )
                widthratio = [1]
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_cols = [
                    n for tup in idx for n in tup if n not in data[x].unique()
                ]
                if len(idx_not_in_cols) != 0:
                    raise ValueError(
                        str(idx_not_in_cols) +
                        " cannot be found in the column " + x)
                # data_wide_cols=[n for tup in idx for n in tup if n in data.columns]
                if (any(len(element) != 2 for element in idx)):
                    # If any of the tuples does not contain exactly 2 elements.
                    raise ValueError(element + " does not have length 2.")
                # Make sure the widthratio of the seperate multiplot corresponds to how
                # many groups there are in each one.
                tuple_in = idx
                widthratio = []
                for i in tuple_in:
                    widthratio.append(len(i))
        elif idx is None:
            raise ValueError('Please specify idx.')
        showRawData = False  # Just show lines, do not show data.
        showCI = False  # wait till I figure out how to plot this for sns.barplot.
        if datatype == 'long':
            if idx is None:
                ## If `idx` is not specified, just take the FIRST TWO levels alphabetically.
                tuple_in = tuple(np.sort(np.unique(data[x]))[0:2], )
            # pivot the dataframe if it is long!
            data_pivot = data.pivot_table(index=idcol, columns=x, values=y)

    elif paired is False:
        if idx is None:
            widthratio = [1]
            tuple_in = (tuple(data[x].unique()), )
            if len(tuple_in[0]) > 2:
                floatContrast = False
        else:
            if all(isinstance(element, str) for element in idx):
                # if idx is supplied but not a multiplot (ie single list or tuple)
                # check all every idx specified can be found in data[x]
                idx_not_in_x = [n for n in idx if n not in data[x].unique()]
                if len(idx_not_in_x) != 0:
                    raise ValueError(
                        str(idx_not_in_x) + " cannot be found in the column " +
                        x)
                tuple_in = (idx, )
                widthratio = [1]
                if len(idx) > 2:
                    floatContrast = False
            elif all(isinstance(element, tuple) for element in idx):
                # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
                idx_not_in_x = [
                    n for tup in idx for n in tup if n not in data[x].unique()
                ]
                if len(idx_not_in_x) != 0:
                    raise ValueError(
                        str(idx_not_in_x) + " cannot be found in the column " +
                        x)
                tuple_in = idx

                if (any(len(element) > 2 for element in tuple_in)):
                    # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                    floatContrast = False
                # Make sure the widthratio of the seperate multiplot corresponds to how
                # many groups there are in each one.
                widthratio = []
                for i in tuple_in:
                    widthratio.append(len(i))
            else:
                raise TypeError(
                    "The object passed to `idx` consists of a mixture of single strings and tuples. \
                    Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting."
                )

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar = True
        summaryLine = False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine = False
    # initialise statfunction
    if statfunction == None:
        statfunction = np.mean
    # Create list to collect all the contrast DataFrames generated.
    contrastList = list()
    contrastListNames = list()

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol = kwargs['hue']
            if colorCol not in data.columns:
                raise ValueError(colorCol + ' is not a column name.')
            colGrps = data[colorCol].unique()  #.tolist()
            plotPal = dict(
                zip(colGrps, sns.color_palette(n_colors=len(colGrps))))
        else:
            if datatype == 'long':
                colGrps = data[x].unique()  #.tolist()
                plotPal = dict(
                    zip(colGrps, sns.color_palette(n_colors=len(colGrps))))
            if datatype == 'wide':
                plotPal = np.repeat('k', len(data))
    else:
        if datatype == 'long':
            plotPal = pal
        if datatype == 'wide':
            plotPal = list(map(lambda x: pal[x], data[hue]))

    if swarmYlim is None:
        # get range of _selected groups_.
        # u = list()
        # for t in tuple_in:
        #     for i in np.unique(t):
        #         u.append(i)
        # u = np.unique(u)
        u = np.unique([element for tupl in tuple_in for element in tupl])
        if datatype == 'long':
            tempdat = data[data[x].isin(u)]
            swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])])
        if datatype == 'wide':
            allMin = list()
            allMax = list()
            for col in u:
                allMin.append(np.min(data[col]))
                allMax.append(np.max(data[col]))
            swarm_ylim = np.array([np.min(allMin), np.max(allMax)])
        swarm_ylim = np.round(swarm_ylim)
    else:
        swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]])

    if summaryBar is True:
        lims = swarm_ylim
        # check that 0 lies within the desired limits.
        # if not, extend (upper or lower) limit to zero.
        if 0 not in range(int(round(lims[0])), int(round(
                lims[1]))):  # turn swarm_ylim to integer range.
            # check if all negative:.
            if lims[0] < 0. and lims[1] < 0.:
                swarm_ylim = np.array([np.min(lims), 0.])
            # check if all positive.
            elif lims[0] > 0. and lims[1] > 0.:
                swarm_ylim = np.array([0., np.max(lims)])

    if contrastYlim is not None:
        contrastYlim = np.array([contrastYlim[0], contrastYlim[1]])

    # plot params
    if axis_title_size is None:
        axis_title_size = 27
    if yticksize is None:
        yticksize = 22
    if xticksize is None:
        xticksize = 22

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams = {'labelsize': axis_title_size}
    xtickParams = {'labelsize': xticksize}
    ytickParams = {'labelsize': yticksize}
    svgParams = {'fonttype': 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams)

    if figsize is None:
        if len(tuple_in) > 2:
            figsize = (12, (12 / np.sqrt(2)))
        else:
            figsize = (8, (8 / np.sqrt(2)))

    # calculate CI.
    if ci < 0 or ci > 100:
        raise ValueError('ci should be between 0 and 100.')
    alpha_level = (100. - ci) / 100.

    # Initialise figure, taking into account desired figsize.
    fig = plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain = gridspec.GridSpec(
        1,
        np.shape(tuple_in)[0],
        # 1 row; columns based on number of tuples in tuple.
        width_ratios=widthratio,
        wspace=0)

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        if datatype == 'long':
            plotdat = data[data[x].isin(current_tuple)]
            plotdat[x] = plotdat[x].astype("category")
            plotdat[x].cat.set_categories(current_tuple,
                                          ordered=True,
                                          inplace=True)
            plotdat.sort_values(by=[x])
            # # Drop all nans.
            # plotdat.dropna(inplace=True)
            summaries = plotdat.groupby(x)[y].apply(statfunction)
        if datatype == 'wide':
            plotdat = data[list(current_tuple)]
            summaries = statfunction(plotdat)
            plotdat = pd.melt(plotdat)  ##### NOW I HAVE MELTED THE WIDE DATA.

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot.
            ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False)
            ax_contrast = ax_raw.twinx()
        else:
            # Create subGridSpec with 2 rows and 1 column.
            subGridSpec = gridspec.GridSpecFromSubplotSpec(
                2, 1, subplot_spec=gsMain[gsIdx], wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False)
            ax_contrast = plt.Subplot(fig,
                                      subGridSpec[1, 0],
                                      sharex=ax_raw,
                                      frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast = list()
        if paired is False:
            tempplotdat = plotdat[[
                x, y
            ]]  # only select the columns used for x and y plotting.
            for i in range(1, len(current_tuple)):
                # Note that you start from one. No need to do auto-contrast!
                # if datatype=='long':aas
                tempbs = bootstrap_contrast(
                    data=tempplotdat.dropna(),
                    x=x,
                    y=y,
                    idx=[current_tuple[0], current_tuple[i]],
                    statfunction=statfunction,
                    smoothboot=smoothboot,
                    alpha_level=alpha_level,
                    reps=reps)
                bscontrast.append(tempbs)
                contrastList.append(tempbs)
                contrastListNames.append(current_tuple[i] + ' vs. ' +
                                         current_tuple[0])

        #### PLOT RAW DATA.
        ax_raw.set_ylim(swarm_ylim)
        # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())
        if paired is False and showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            sw = sns.swarmplot(data=plotdat,
                               x=x,
                               y=y,
                               order=current_tuple,
                               ax=ax_raw,
                               alpha=alpha,
                               palette=plotPal,
                               size=rawMarkerSize,
                               marker=rawMarkerType,
                               **kwargs)

            if floatContrast:
                # Get horizontal offset values.
                maxXBefore = max(sw.collections[0].get_offsets().T[0])
                minXAfter = min(sw.collections[1].get_offsets().T[0])
                xposAfter = maxXBefore + floatSwarmSpacer
                xAfterShift = minXAfter - xposAfter
                # shift the (second) swarmplot
                offsetSwarmX(sw.collections[1], -xAfterShift)
                # shift the tick.
                ax_raw.set_xticks([0., 1 - xAfterShift])

        elif paired is True:
            if showRawData is True:
                sw = sns.swarmplot(data=plotdat,
                                   x=x,
                                   y=y,
                                   order=current_tuple,
                                   ax=ax_raw,
                                   alpha=alpha,
                                   palette=plotPal,
                                   size=rawMarkerSize,
                                   marker=rawMarkerType,
                                   **kwargs)
            if connectPairs is True:
                # Produce paired plot with lines.
                before = plotdat[plotdat[x] == current_tuple[0]][y].tolist()
                after = plotdat[plotdat[x] == current_tuple[1]][y].tolist()
                linedf = pd.DataFrame({'before': before, 'after': after})
                # to get color, need to loop thru each line and plot individually.
                for ii in range(0, len(linedf)):
                    ax_raw.plot(
                        [0, 0.25],
                        [linedf.loc[ii, 'before'], linedf.loc[ii, 'after']],
                        linestyle='solid',
                        linewidth=pairedDeltaLineWidth,
                        color=plotPal[current_tuple[0]],
                        alpha=pairedDeltaLineAlpha,
                    )
                ax_raw.set_xlim(-0.25, 0.5)
                ax_raw.set_xticks([0, 0.25])
                ax_raw.set_xticklabels([current_tuple[0], current_tuple[1]])

        # if swarmYlim is None:
        #     # if swarmYlim was not specified, tweak the y-axis
        #     # to show all the data without losing ticks and range.
        #     ## Get all yticks.
        #     axxYTicks=ax_raw.yaxis.get_majorticklocs()
        #     ## Get ytick interval.
        #     YTickInterval=axxYTicks[1]-axxYTicks[0]
        #     ## Get current ylim
        #     currentYlim=ax_raw.get_ylim()
        #     ## Extend ylim by adding a fifth of the tick interval as spacing at both ends.
        #     ax_raw.set_ylim(
        #         currentYlim[0]-(YTickInterval/5),
        #         currentYlim[1]+(YTickInterval/5)
        #         )
        #     ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto'))
        # ax_raw.yaxis.set_major_locator(LinearLocator())

        if summaryBar is True:
            if paired is False:
                bar_raw = sns.barplot(x=summaries.index.tolist(),
                                      y=summaries.values,
                                      facecolor=summaryBarColor,
                                      ax=ax_raw,
                                      alpha=summaryBarAlpha)
                if floatContrast is True:
                    maxSwarmSpan = 2 / 10.
                    xlocs = list()
                    for i, bar in enumerate(bar_raw.patches):
                        x_width = bar.get_x()
                        width = bar.get_width()
                        centre = x_width + (width / 2.)
                        if i == 0:
                            bar.set_x(centre - maxSwarmSpan / 2.)
                            xlocs.append(centre)
                        else:
                            bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.)
                            xlocs.append(centre - xAfterShift)
                        bar.set_width(maxSwarmSpan)
                    ax_raw.set_xticks(
                        xlocs)  # make sure xticklocs match the barplot.
                elif floatContrast is False:
                    maxSwarmSpan = 4 / 10.
                    xpos = ax_raw.xaxis.get_majorticklocs()
                    for i, bar in enumerate(bar_raw.patches):
                        bar.set_x(xpos[i] - maxSwarmSpan / 2.)
                        bar.set_width(maxSwarmSpan)
            else:
                # if paired is true
                ax_raw.bar([0, 0.25], [
                    statfunction(plotdat[current_tuple[0]]),
                    statfunction(plotdat[current_tuple[1]])
                ],
                           color=summaryBarColor,
                           alpha=0.5,
                           width=0.05)
                ## Draw zero reference line.
                ax_raw.add_artist(
                    Line2D((ax_raw.xaxis.get_view_interval()[0],
                            ax_raw.xaxis.get_view_interval()[1]), (0, 0),
                           color='k',
                           linewidth=1.25))

        if summaryLine is True:
            if paired is True:
                xdelta = 0
            else:
                xdelta = summaryLineWidth
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i - xdelta, i + xdelta),  # x-coordinates
                    (m, m),
                    color=summaryColour,
                    linestyle=summaryLineStyle)

        if showCI is True:
            sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple) == 2:
            if paired is False:
                # Plot the CIs on the contrast axes.
                plotbootstrap(sw.collections[1],
                              bslist=tempbs,
                              ax=ax_contrast,
                              violinWidth=violinWidth,
                              violinOffset=violinOffset,
                              markersize=summaryMarkerSize,
                              marker=summaryMarkerType,
                              offset=floatContrast,
                              color=violinColor,
                              linewidth=1)
            else:
                bootsDelta = bootstrap(plotdat[current_tuple[1]] -
                                       plotdat[current_tuple[0]],
                                       statfunction=statfunction,
                                       smoothboot=smoothboot,
                                       alpha_level=alpha_level,
                                       reps=reps)
                contrastList.append(bootsDelta)
                contrastListNames.append(current_tuple[1] + ' vs. ' +
                                         current_tuple[0])
                summDelta = bootsDelta['summary']
                lowDelta = bootsDelta['bca_ci_low']
                highDelta = bootsDelta['bca_ci_high']

                if floatContrast:
                    xpos = 0.375
                else:
                    xpos = 0.25

                # Plot the summary measure.
                ax_contrast.plot(xpos,
                                 bootsDelta['summary'],
                                 marker=summaryMarkerType,
                                 markerfacecolor='k',
                                 markersize=summaryMarkerSize,
                                 alpha=0.75)
                # Plot the CI.
                ax_contrast.plot(
                    [xpos, xpos],
                    [lowDelta, highDelta],
                    color='k',
                    alpha=0.75,
                    # linewidth=1,
                    linestyle='solid')

                # Plot the violin-plot.
                v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos],
                                           widths=violinWidth,
                                           showextrema=False,
                                           showmeans=False)
                halfviolin(v, half='right', color='k')

            if floatContrast:
                # Set reference lines
                if paired is False:
                    ## First get leftmost limit of left reference group
                    xtemp, _ = np.array(sw.collections[0].get_offsets()).T
                    leftxlim = xtemp.min()
                    ## Then get leftmost limit of right test group
                    xtemp, _ = np.array(sw.collections[1].get_offsets()).T
                    rightxlim = xtemp.min()
                    ref = tempbs['summary']
                else:
                    leftxlim = 0
                    rightxlim = 0.25
                    ref = bootsDelta['summary']
                    ax_contrast.set_xlim(-0.25, 0.5)  # does this work?

                ## zero line
                ax_contrast.hlines(
                    0,  # y-coordinates
                    leftxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastZeroLineStyle,
                    linewidth=1,
                    color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(
                    ref,
                    rightxlim,
                    3.5,  # x-coordinates, start and end.
                    linestyle=contrastEffectSizeLineStyle,
                    linewidth=1,
                    color=contrastEffectSizeLineColor)

                if paired is False:
                    es = float(tempbs['summary'])
                    refSum = tempbs['statistic_ref']
                else:
                    es = float(bootsDelta['summary'])
                    refSum = statfunction(plotdat[current_tuple[0]])
                ## If the effect size is positive, shift the right axis up.
                if es > 0:
                    rightmin = ax_raw.get_ylim()[0] - es
                    rightmax = ax_raw.get_ylim()[1] - es
                ## If the effect size is negative, shift the right axis down.
                elif es < 0:
                    rightmin = ax_raw.get_ylim()[0] + es
                    rightmax = ax_raw.get_ylim()[1] + es
                ax_contrast.set_ylim(rightmin, rightmax)

                if gsIdx > 0:
                    ax_contrast.set_ylabel('')
                align_yaxis(ax_raw, refSum, ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)

                if paired is False:
                    # Set xlims so everything is properly visible!
                    swarm_xbounds = ax_raw.get_xbound()
                    ax_contrast.set_xbound(
                        swarm_xbounds[0] - (summaryLineWidth * 1.1),
                        swarm_xbounds[1] + (summaryLineWidth * 1.1))
                else:
                    ax_contrast.set_xlim(-0.05, 0.25 + violinWidth)

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(bslist=bscontrast,
                                   ax=ax_contrast,
                                   violinWidth=violinWidth,
                                   violinOffset=violinOffset,
                                   markersize=summaryMarkerSize,
                                   marker=summaryMarkerType,
                                   linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx > 0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList = pd.DataFrame(contrastList).T
    contrastList.columns = contrastListNames

    # Get number of axes in figure for aesthetic tweaks.
    axesCount = len(fig.get_axes())
    for i in range(0, axesCount, 2):
        # Set new tick labels.
        # The tick labels belong to the SWARM axes
        # for both floating and non-floating plots.
        # This is because `sharex` was invoked.
        axx = fig.axes[i]
        newticklabs = list()
        for xticklab in axx.xaxis.get_ticklabels():
            t = xticklab.get_text()
            if paired:
                N = str(counts)
            else:
                N = str(counts.ix[t])

            if showGroupCount:
                newticklabs.append(t + ' n=' + N)
            else:
                newticklabs.append(t)
            axx.set_xticklabels(newticklabs,
                                rotation=tickAngle,
                                horizontalalignment=tickAlignment)

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx = fig.axes[i]

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if i == 0:
            drawback_y(axx)

        if i != axesCount - 2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(False)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)
        else:
            drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(
                Line2D((axx.xaxis.get_view_interval()[0],
                        axx.xaxis.get_view_interval()[1]), (0, 0),
                       color='black',
                       linewidth=0.75))

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i == axesCount - 2:  # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                           bbox_to_anchor=(1.1, 1.0),
                           fontsize=legendFontSize,
                           **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j, i in enumerate(range(1, axesCount, 2)):
        axx = fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright = axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                       xmin=xleft - 1,
                       xmax=xright + 1,
                       linestyle=contrastZeroLineStyle,
                       linewidth=0.75,
                       color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes, only is axesCount is 2.
                    # Not entirely sure why I have to do this.
                    if axesCount == 2:
                        drawback_y(axx)

            sns.despine(ax=axx,
                        top=True,
                        right=True,
                        left=False,
                        bottom=False,
                        trim=True)
            if j == 0 and axesCount == 2:
                # Draw back x-axis lines connecting ticks.
                drawback_x(axx)
            # Rotate tick labels.
            rotateTicks(axx, tickAngle, tickAlignment)

        elif floatContrast is True:
            if paired is True:
                # Get the bootstrapped contrast range.
                lower = np.min(contrastList.ix['stat_array', j])
                upper = np.max(contrastList.ix['stat_array', j])
            else:
                lower = np.min(contrastList.ix['diffarray', j])
                upper = np.max(contrastList.ix['diffarray', j])
            meandiff = contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower > 0:
                lower = 0.
            if upper < 0:
                upper = 0.

            ## Get the tick interval from the left y-axis.
            leftticks = fig.get_axes()[i - 1].get_yticks()
            tickstep = leftticks[1] - leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1 = axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2 = list()
            for a, b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2) < meandiff:
                ind = np.where(newticks1 == np.max(newticks2))[0][
                    0]  # find out the max tick index in newticks1.
                newticks2.append(newticks1[ind + 1])
            elif meandiff < np.min(newticks2):
                ind = np.where(newticks1 == np.min(newticks2))[0][
                    0]  # find out the min tick index in newticks1.
                newticks2.append(newticks1[ind - 1])
            newticks2 = np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))

            ## Despine the axes.
            sns.despine(ax=axx,
                        trim=True,
                        bottom=False,
                        right=False,
                        left=True,
                        top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount > 2 and contrastShareY is True and floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower = list()
            upper = list()
            for c in range(0, len(contrastList.columns)):
                lower.append(np.min(contrastList.ix['bca_ci_low', c]))
                upper.append(np.max(contrastList.ix['bca_ci_high', c]))
            lower = np.min(lower)
            upper = np.max(upper)
        else:
            lower = contrastYlim[0]
            upper = contrastYlim[1]

        normalizeContrastY(fig,
                           contrast_ylim=contrastYlim,
                           show_all_yaxes=showAllYAxes)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:
        # Tight Layout!
        gsMain.tight_layout(fig)

    # And we're all done.
    rcdefaults()  # restore matplotlib defaults.
    sns.set()  # restore seaborn defaults.
    return fig, contrastList
def swarmsummary(data,
                 x,
                 y,
                 idx=None,
                 statfunction=None,
                 violinOffset=0.1,
                 violinWidth=0.2,
                 figsize=(7, 7),
                 legend=True,
                 smoothboot=False,
                 rawMarkerSize=10,
                 summaryMarkerSize=12,
                 rawMarkerType='o',
                 summaryMarkerType='o',
                 **kwargs):
    df = data  # so we don't re-order the rawdata!
    # initialise statfunction
    if statfunction == None:
        statfunction = np.mean

    # calculate bootstrap list.
    bslist = OrderedDict()

    if idx is None:
        levs = df[x].unique()  # DO NOT USE the numpy.unique() method.
        # It will not preserve the order of appearance of the levels.
    else:
        levs = idx

    for i in range(0, len(levs)):
        temp_df = df.loc[df[x] == levs[i]]
        bslist[levs[i]] = bootstrap(temp_df[y],
                                    statfunction=statfunction,
                                    smoothboot=smoothboot)

    bsplotlist = list(bslist.items())

    # Initialise figure
    #sns.set_style('ticks')
    fig, ax = plt.subplots(figsize=figsize)
    sw = sns.swarmplot(data=df,
                       x=x,
                       y=y,
                       order=levs,
                       size=rawMarkerSize,
                       marker=rawMarkerType,
                       **kwargs)
    y_lims = list()

    for i in range(0, len(bslist)):
        plotbootstrap(sw.collections[i],
                      bslist=bsplotlist[i][1],
                      ax=ax,
                      violinWidth=violinWidth,
                      violinOffset=violinOffset,
                      marker=summaryMarkerType,
                      markersize=summaryMarkerSize,
                      color='k',
                      linewidth=2)

        # Get the y-offsets, save into a list.
        _, y = np.array(sw.collections[i].get_offsets()).T
        y_lims.append(y)

    # Concatenate the list of y-offsets
    y_lims = np.concatenate(y_lims)
    ax.set_ylim(0.9 * y_lims.min(), 1.1 * y_lims.max())

    if legend is True:
        ax.legend(loc='center left', bbox_to_anchor=(1.1, 1))
    elif legend is False:
        ax.legend().set_visible(False)

    sns.despine(ax=ax, trim=True)

    return fig, pd.DataFrame.from_dict(bslist)
Ejemplo n.º 11
0
def expandable_ttest(
    df,
    colorset=QUALITATIVE_COLORSET,
    compare="Treatment",
    comparisons={"Period [days]": []},
    datacolumn_label="Sucrose Preference Ratio",
    legend_loc="best",
    rename_treatments={},
    bp_style=True,
    save_as=False,
):
    """High-level interface for plotting of one or multiple related t-tests.

	Parameters
	----------

	df : {pandas.Dataframe, string}
	Pandas Dataframe containing the experimental data, or path pointing to a csv containing such data.

	compare : string, optional
	Which parameter to categorize the comparison by. Must be a column name from df.

	comparisons : dict, optional
	A dictionary, the key of which indicates which df column to generate comparison insances from. If only a subset of the available rows are to be included in the comparison, the dictionary needs to specify a value, consisting of a list of acceptable values on the column given by the key.

	datacolumn_label : string, optional
	A column name from df, the values in which column give the data to plot.

	legend_loc : string, optional
	Where to place the legend on the figure.

	rename_treatments : dict, optional
	Dictionary with strings as keys and values used to map treatment names onto new stings.

	bp_style : bool, optional
	Whether to apply the default behaviopy style.

	Notes
	-----

	Seaborn's `sns.swarmplot()` does not read rcParams by itself, so we need to pass it `size=rcParams['lines.markersize']` to correctly set the marker size.
	"""

    try:
        if isinstance(df, basestring):
            df = path.abspath(path.expanduser(df))
            df = pd.read_csv(df)
    except NameError:
        if isinstance(df, str):
            df = path.abspath(path.expanduser(df))
            df = pd.read_csv(df)

    comparison_instances_label = list(comparisons.keys())[0]
    comparison_instances = list(comparisons.values())[0]
    if comparison_instances:
        df[df[comparison_instances_label].isin([comparison_instances])]

    if rename_treatments:
        for key in rename_treatments:
            df.loc[df["Treatment"] == key,
                   "Treatment"] = rename_treatments[key]
        df = control_first_reordering(df, "Treatment")

    if bp_style:
        sns.set_style("white", {'legend.frameon': True})
        plt.style.use(u'seaborn-darkgrid')
        plt.style.use(u'ggplot')

    sns.swarmplot(
        x=comparison_instances_label,
        y=datacolumn_label,
        hue=compare,
        data=df,
        palette=sns.color_palette(colorset),
        split=True,
        size=rcParams['lines.markersize'],
    )
    plt.legend(loc=legend_loc, frameon=True)

    add_significance(df,
                     datacolumn_label,
                     compare=compare,
                     over=comparison_instances_label)

    if save_as:
        plt.savefig(path.abspath(path.expanduser(save_as)),
                    bbox_inches='tight')
Ejemplo n.º 12
0
def do_movement():
    with figure("movement", figsize=fig_size(0.9, 0.4)):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["path_length", "move_x", "move_y"])
        g = sns.factorplot(x="experiment", y="value", col="variable",
                           data=molten, kind="box")
        g.fig.axes[0].set_title("Path length")
        g.fig.axes[1].set_title("Movement in $x$")
        g.fig.axes[2].set_title("Movement in $y$")
        g.fig.axes[0].set_ylabel("distance (m)")
        plt.ylim(0, plt.ylim()[1])

    with figure("movement_x"):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["move_l", "move_r", "move_x"])
        g = sns.factorplot(x="experiment", y="value", col="variable",
                           data=molten, kind="box")
        g.fig.axes[0].set_title("Movement left")
        g.fig.axes[1].set_title("Movement right")
        g.fig.axes[2].set_title("Movement in $x$")
        g.fig.axes[0].set_ylabel("distance (m)")
        plt.ylim(0, plt.ylim()[1])

    with figure("movement_y"):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["move_b", "move_f", "move_y"])
        g = sns.factorplot(x="experiment", y="value", col="variable",
                           data=molten, kind="box")
        g.fig.axes[0].set_title("Movement backwards")
        g.fig.axes[1].set_title("Movement forwards")
        g.fig.axes[2].set_title("Movement in $y$")
        g.fig.axes[0].set_ylabel("distance (m)")
        plt.ylim(0, plt.ylim()[1])

    with figure("movement_back"):
        sns.factorplot(x="experiment", y="move_b", data=analyses, kind="box")
        sns.swarmplot(x="experiment", y="move_b", split=True, data=analyses,
                      palette=cmap_complement)
        plt.ylabel("distance (m)")
        plt.title("Movement backwards")

    with figure("movement_runs", figsize=fig_size(0.9, 0.4)):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["path_length", "move_x", "move_y"])
        g = sns.factorplot(x="order", y="value", col="variable",
                           data=molten, hue="experiment", capsize=0.2)
        g.fig.axes[0].set_title("Path length")
        g.fig.axes[1].set_title("Movement in $x$")
        g.fig.axes[2].set_title("Movement in $y$")
        g.fig.axes[0].set_ylabel("distance (m)")
        g.fig.axes[0].set_xlabel("run")
        g.fig.axes[1].set_xlabel("run")
        g.fig.axes[2].set_xlabel("run")
        plt.ylim(0, plt.ylim()[1])

    with figure("movement_x_runs"):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["move_l", "move_r", "move_x"])
        g = sns.factorplot(x="order", y="value", col="variable",
                           data=molten, hue="experiment")
        g.fig.axes[0].set_title("Movement left")
        g.fig.axes[1].set_title("Movement right")
        g.fig.axes[2].set_title("Movement in $x$")
        g.fig.axes[0].set_ylabel("distance (m)")
        g.fig.axes[0].set_xlabel("run")
        g.fig.axes[1].set_xlabel("run")
        g.fig.axes[2].set_xlabel("run")
        plt.ylim(0, plt.ylim()[1])

    with figure("movement_y_runs"):
        molten = pd.melt(analyses,
                         id_vars=["user", "experiment", "order", "group"],
                         value_vars=["move_b", "move_f", "move_y"])
        g = sns.factorplot(x="order", y="value", col="variable",
                           data=molten, hue="experiment")
        g.fig.axes[0].set_title("Movement backwards")
        g.fig.axes[1].set_title("Movement forwards")
        g.fig.axes[2].set_title("Movement in $y$")
        g.fig.axes[0].set_ylabel("distance (m)")
        g.fig.axes[0].set_xlabel("run")
        g.fig.axes[1].set_xlabel("run")
        g.fig.axes[2].set_xlabel("run")
        plt.ylim(0, plt.ylim()[1])
fits_x_half = 2 * fits_x[-100:]
raw_half = 0.5 * (np.fliplr(raw_diff[:, :5]) + raw_diff[:, -5:])
fits_half = 0.5 * (np.fliplr(fits_diff[:, :100]) + fits_diff[:, -100:])

# %% Plot difference in two conditions
rcParams['font.sans-serif'] = "Arial"

c3 = '#332288'
ms = 5
plt.figure(figsize=(8.7 / 2.54, 2.5))
angles = 20 * [1.25, 2.5, 5, 10, 20]
angles += list(np.arange(1.25, 30, 1.25))
angles += 20 * [30]
effect = list(raw_half.ravel()) + 23 * [None] + list(raw_half.mean(1))
data = DataFrame(data={'angles': angles, 'effect': effect})
sns.swarmplot('angles', 'effect', data=data, palette=[c3], marker='^', size=3)
sns.pointplot('angles', 'effect', data=data, color=c3, join=False, markers ='^', scale=.1, capsize=.6, errwidth=1)
plt.plot([0, 1, 3, 7, 15], raw_half.mean(0), marker='^', mfc='w', mec=c3,
         markersize=ms, lw=0, zorder=100)
plt.plot([23], raw_half.mean(0).mean(0), marker='^', mfc='w', mec=c3,
         markersize=ms, lw=0, zorder=100)
plt.xticks([0, 1, 3, 7, 15, 23], [1.25, 2.5, 5, 10, 20, 25])
plt.gca().set_xticklabels(xlabels + ['Mean'])
plt.xlabel(u'Auditory Separation (°)')
plt.ylabel('Performance Improvement \n (% correct)')

plt.xlim([-1, 25])
plt.tight_layout()
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.subplots_adjust(left=0.175, right=1, top=0.95, bottom=0.2)
Ejemplo n.º 14
0
def contrastplot_test(
    data, x, y, idx=None, 
    
    alpha=0.75, 
    axis_title_size=None,

    barWidth=5,

    contrastShareY=True,
    contrastEffectSizeLineStyle='solid',
    contrastEffectSizeLineColor='black',
    contrastYlim=None,
    contrastZeroLineStyle='solid', 
    contrastZeroLineColor='black', 

    effectSizeYLabel="Effect Size", 

    figsize=None, 
    floatContrast=True,
    floatSwarmSpacer=0.2,

    heightRatio=(1, 1),

    idcol=None,

    lineWidth=2,
    legend=True,
    legendFontSize=14,
    legendFontProps={},

    paired=False,
    pal=None, 

    rawMarkerSize=8,
    rawMarkerType='o',
    reps=3000,
    
    showGroupCount=True,
    show95CI=False, 
    showAllYAxes=False,
    showRawData=True,
    smoothboot=False, 
    statfunction=None, 

    summaryBar=False, 
    summaryBarColor='grey',
    summaryBarAlpha=0.25,

    summaryColour='black', 
    summaryLine=True, 
    summaryLineStyle='solid', 
    summaryLineWidth=0.25, 

    summaryMarkerSize=10, 
    summaryMarkerType='o',

    swarmShareY=True, 
    swarmYlim=None, 

    tickAngle=45,
    tickAlignment='right',

    violinOffset=0.375,
    violinWidth=0.2, 
    violinColor='k',

    xticksize=None,
    yticksize=None,

    **kwargs):

    '''Takes a pandas dataframe and produces a contrast plot:
    either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot.
    -----------------------------------------------------------------------
    Description of flags upcoming.'''

    # Check that `data` is a pandas dataframe
    if 'DataFrame' not in str(type(data)):
        raise TypeError("The object passed to the command is not not a pandas DataFrame.\
         Please convert it to a pandas DataFrame.")

    # Get and set levels of data[x]    
    if idx is None:
        widthratio=[1]
        allgrps=np.sort(data[x].unique())
        if paired:
            # If `idx` is not specified, just take the FIRST TWO levels alphabetically.
            tuple_in=tuple(allgrps[0:2],)
        else:
            # No idx is given, so all groups are compared to the first one in the DataFrame column.
            tuple_in=(tuple(allgrps), )
            if len(allgrps)>2:
                floatContrast=False

    else:
        if all(isinstance(element, str) for element in idx):
            # if idx is supplied but not a multiplot (ie single list or tuple) 
            tuple_in=(idx, )
            widthratio=[1]
            if len(idx)>2:
                floatContrast=False
        elif all(isinstance(element, tuple) for element in idx):
            # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot!
            tuple_in=idx
            if ( any(len(element)>2 for element in tuple_in) ):
                # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False.
                floatContrast=False
            # Make sure the widthratio of the seperate multiplot corresponds to how 
            # many groups there are in each one.
            widthratio=[]
            for i in tuple_in:
                widthratio.append(len(i))
        else:
            raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \
                Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting.")

    # initialise statfunction
    if statfunction == None:
        statfunction=np.mean

    # Create list to collect all the contrast DataFrames generated.
    contrastList=list()
    contrastListNames=list()
    # # Calculate the bootstraps according to idx.
    # for ix, current_tuple in enumerate(tuple_in):
    #     bscontrast=list()
    #     for i in range (1, len(current_tuple)):
    #     # Note that you start from one. No need to do auto-contrast!
    #         tempbs=bootstrap_contrast(
    #             data=data,
    #             x=x,
    #             y=y,
    #             idx=[current_tuple[0], current_tuple[i]],
    #             statfunction=statfunction,
    #             smoothboot=smoothboot,
    #             reps=reps)
    #         bscontrast.append(tempbs)
    #         contrastList.append(tempbs)
    #         contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])

    # Setting color palette for plotting.
    if pal is None:
        if 'hue' in kwargs:
            colorCol=kwargs['hue']
            colGrps=data[colorCol].unique()
            nColors=len(colGrps)
        else:
            colorCol=x
            colGrps=data[x].unique()
            nColors=len([element for tupl in tuple_in for element in tupl])
        plotPal=dict( zip( colGrps, sns.color_palette(n_colors=nColors) ) )
    else:
        plotPal=pal

    # Ensure summaryLine and summaryBar are not displayed together.
    if summaryLine is True and summaryBar is True:
        summaryBar=True
        summaryLine=False
    # Turn off summary line if floatContrast is true
    if floatContrast:
        summaryLine=False

    if swarmYlim is None:
        # get range of _selected groups_.
        u = list()
        for t in idx:
            for i in np.unique(t):
                u.append(i)
        u = np.unique(u)
        tempdat=data[data[x].isin(u)]
        swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])])
    else:
        swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]])

    if contrastYlim is not None:
        contrastYlim=np.array([contrastYlim[0],contrastYlim[1]])

    barWidth=barWidth/1000 # Not sure why have to reduce the barwidth by this much! 
    if showRawData is True:
        maxSwarmSpan=0.25
    else:
        maxSwarmSpan=barWidth

    # Expand the ylim in both directions.
    ## Find half of the range of swarm_ylim.
    swarmrange=swarm_ylim[1] -swarm_ylim[0]
    pad=0.1*swarmrange
    x2=np.array([swarm_ylim[0]-pad, swarm_ylim[1]+pad])
    swarm_ylim=x2

    # plot params
    if axis_title_size is None:
        axis_title_size=25
    if yticksize is None:
        yticksize=18
    if xticksize is None:
        xticksize=18

    # Set clean style
    sns.set(style='ticks')

    axisTitleParams={'labelsize' : axis_title_size}
    xtickParams={'labelsize' : xticksize}
    ytickParams={'labelsize' : yticksize}
    svgParams={'fonttype' : 'none'}

    rc('axes', **axisTitleParams)
    rc('xtick', **xtickParams)
    rc('ytick', **ytickParams)
    rc('svg', **svgParams) 

    if figsize is None:
        if len(tuple_in)>2:
            figsize=(12,(12/np.sqrt(2)))
        else:
            figsize=(8,(8/np.sqrt(2)))
    
    # Initialise figure, taking into account desired figsize.
    fig=plt.figure(figsize=figsize)

    # Initialise GridSpec based on `tuple_in` shape.
    gsMain=gridspec.GridSpec( 
        1, np.shape(tuple_in)[0], 
         # 1 row; columns based on number of tuples in tuple.
         width_ratios=widthratio,
         wspace=0 )

    for gsIdx, current_tuple in enumerate(tuple_in):
        #### FOR EACH TUPLE IN IDX
        plotdat=data[data[x].isin(current_tuple)]
        plotdat[x]=plotdat[x].astype("category")
        plotdat[x].cat.set_categories(
            current_tuple,
            ordered=True,
            inplace=True)
        plotdat.sort_values(by=[x])
        # Drop all nans. 
        plotdat=plotdat.dropna()

        # Calculate summaries.
        summaries=plotdat.groupby([x],sort=True)[y].apply(statfunction)

        if floatContrast is True:
            # Use fig.add_subplot instead of plt.Subplot
            ax_raw=fig.add_subplot(gsMain[gsIdx],
                frame_on=False)
            ax_contrast=ax_raw.twinx()
        else:
        # Create subGridSpec with 2 rows and 1 column.
            subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1,
                subplot_spec=gsMain[gsIdx],
                wspace=0)
            # Use plt.Subplot instead of fig.add_subplot
            ax_raw=plt.Subplot(fig,
                subGridSpec[0, 0],
                frame_on=False)
            ax_contrast=plt.Subplot(fig,
                subGridSpec[1, 0],
                sharex=ax_raw,
                frame_on=False)
        # Calculate the boostrapped contrast
        bscontrast=list()
        for i in range (1, len(current_tuple)):
        # Note that you start from one. No need to do auto-contrast!
            tempbs=bootstrap_contrast(
                data=data,
                x=x,
                y=y,
                idx=[current_tuple[0], current_tuple[i]],
                statfunction=statfunction,
                smoothboot=smoothboot,
                reps=reps)
            bscontrast.append(tempbs)
            contrastList.append(tempbs)
            contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0])
        
        #### PLOT RAW DATA.
        if showRawData is True:
            # Seaborn swarmplot doc says to set custom ylims first.
            ax_raw.set_ylim(swarm_ylim)
            sw=sns.swarmplot(
                data=plotdat, 
                x=x, y=y, 
                order=current_tuple, 
                ax=ax_raw, 
                alpha=alpha, 
                palette=plotPal,
                size=rawMarkerSize,
                marker=rawMarkerType,
                **kwargs)

        if summaryBar is True:
            bar_raw=sns.barplot(
                x=summaries.index.tolist(),
                y=summaries.values,
                facecolor=summaryBarColor,
                ax=ax_raw,
                alpha=summaryBarAlpha)
        
        if floatContrast:
            # Get horizontal offset values.
            maxXBefore=max(sw.collections[0].get_offsets().T[0])
            minXAfter=min(sw.collections[1].get_offsets().T[0])
            xposAfter=maxXBefore+floatSwarmSpacer
            xAfterShift=minXAfter-xposAfter
            # shift the swarmplots
            offsetSwarmX(sw.collections[1], -xAfterShift)

            ## get swarm with largest span, set as max width of each barplot.
            for i, bar in enumerate(bar_raw.patches):
                x_width=bar.get_x()
                width=bar.get_width()
                centre=x_width + (width/2.)
                if i == 0:
                    bar.set_x(centre-maxSwarmSpan/2.)
                else:
                    bar.set_x(centre-xAfterShift-maxSwarmSpan/2.)
                bar.set_width(maxSwarmSpan)

            ## Set the ticks locations for ax_raw.
            ax_raw.xaxis.set_ticks((0, xposAfter))
            firstTick=ax_raw.xaxis.get_ticklabels()[0].get_text()
            secondTick=ax_raw.xaxis.get_ticklabels()[1].get_text()
            ax_raw.set_xticklabels([firstTick,#+' n='+count[firstTick],
                                     secondTick],#+' n='+count[secondTick]],
                                   rotation=tickAngle,
                                   horizontalalignment=tickAlignment)

        if summaryLine is True:
            for i, m in enumerate(summaries):
                ax_raw.plot(
                    (i -summaryLineWidth, 
                    i + summaryLineWidth), # x-coordinates
                    (m, m),
                    color=summaryColour, 
                    linestyle=summaryLineStyle)

        if show95CI is True:
                sns.barplot(
                    data=plotdat, 
                    x=x, y=y, 
                    ax=ax_raw, 
                    alpha=0, ci=95)

        ax_raw.set_xlabel("")
        if floatContrast is False:
            fig.add_subplot(ax_raw)

        #### PLOT CONTRAST DATA.
        if len(current_tuple)==2:
            # Plot the CIs on the contrast axes.
            plotbootstrap(sw.collections[1],
                          bslist=tempbs,
                          ax=ax_contrast, 
                          violinWidth=violinWidth,
                          violinOffset=violinOffset,
                          markersize=summaryMarkerSize,
                          marker=summaryMarkerType,
                          offset=floatContrast,
                          color=violinColor,
                          linewidth=1)
            if floatContrast:
                # Set reference lines
                ## First get leftmost limit of left reference group
                xtemp, _=np.array(sw.collections[0].get_offsets()).T
                leftxlim=xtemp.min()
                ## Then get leftmost limit of right test group
                xtemp, _=np.array(sw.collections[1].get_offsets()).T
                rightxlim=xtemp.min()

                ## zero line
                ax_contrast.hlines(0,                   # y-coordinates
                                leftxlim, 3.5,       # x-coordinates, start and end.
                                linestyle=contrastZeroLineStyle,
                                linewidth=0.75,
                                color=contrastZeroLineColor)

                ## effect size line
                ax_contrast.hlines(tempbs['summary'], 
                                rightxlim, 3.5,        # x-coordinates, start and end.
                                linestyle=contrastEffectSizeLineStyle,
                                linewidth=0.75,
                                color=contrastEffectSizeLineColor)

                
                ## If the effect size is positive, shift the right axis up.
                if float(tempbs['summary'])>0:
                    rightmin=ax_raw.get_ylim()[0] -float(tempbs['summary'])
                    rightmax=ax_raw.get_ylim()[1] -float(tempbs['summary'])
                ## If the effect size is negative, shift the right axis down.
                elif float(tempbs['summary'])<0:
                    rightmin=ax_raw.get_ylim()[0] + float(tempbs['summary'])
                    rightmax=ax_raw.get_ylim()[1] + float(tempbs['summary'])

                ax_contrast.set_ylim(rightmin, rightmax)

                    
                if gsIdx>0:
                    ax_contrast.set_ylabel('')

                align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.)

            else:
                # Set bottom axes ybounds
                if contrastYlim is not None:
                    ax_contrast.set_ylim(contrastYlim)
                
                # Set xlims so everything is properly visible!
                swarm_xbounds=ax_raw.get_xbound()
                ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), 
                    swarm_xbounds[1] + (summaryLineWidth * 1.1))

        else:
            # Plot the CIs on the bottom axes.
            plotbootstrap_hubspoke(
                bslist=bscontrast,
                ax=ax_contrast,
                violinWidth=violinWidth,
                violinOffset=violinOffset,
                markersize=summaryMarkerSize,
                marker=summaryMarkerType,
                linewidth=lineWidth)

        if floatContrast is False:
            fig.add_subplot(ax_contrast)

        if gsIdx>0:
            ax_raw.set_ylabel('')
            ax_contrast.set_ylabel('')

    # Turn contrastList into a pandas DataFrame,
    contrastList=pd.DataFrame(contrastList).T
    contrastList.columns=contrastListNames
    
    ########
    axesCount=len(fig.get_axes())

    ## Loop thru SWARM axes for aesthetic touchups.
    for i in range(0, axesCount, 2):
        axx=fig.axes[i]

        if i!=axesCount-2 and 'hue' in kwargs:
            # If this is not the final swarmplot, remove the hue legend.
            axx.legend().set_visible(False)

        if floatContrast is False:
            axx.xaxis.set_visible(False)
            sns.despine(ax=axx, trim=True, bottom=False, left=False)
        else:
            sns.despine(ax=axx, trim=True, bottom=True, left=True)

        if showAllYAxes is False:
            if i in range(2, axesCount):
                axx.yaxis.set_visible(showAllYAxes)
            else:
                # Draw back the lines for the relevant y-axes.
                # Not entirely sure why I have to do this.
                drawback_y(axx)

        # Add zero reference line for swarmplots with bars.
        if summaryBar is True:
            axx.add_artist(Line2D(
                (axx.xaxis.get_view_interval()[0], 
                    axx.xaxis.get_view_interval()[1]), 
                (0,0),
                color='black', linewidth=0.75
                )
            )

        # I don't know why the swarm axes controls the contrast axes ticks....
        if showGroupCount:
            count=data.groupby(x).count()[y]
            newticks=list()
            for ix, t in enumerate(axx.xaxis.get_ticklabels()):
                t_text=t.get_text()
                nt=t_text+' n='+str(count[t_text])
                newticks.append(nt)
            axx.xaxis.set_ticklabels(newticks)

        if legend is False:
            axx.legend().set_visible(False)
        else:
            if i==axesCount-2: # the last (rightmost) swarm axes.
                axx.legend(loc='top right',
                    bbox_to_anchor=(1.1,1.0),
                    fontsize=legendFontSize,
                    **legendFontProps)

    ## Loop thru the CONTRAST axes and perform aesthetic touch-ups.
    ## Get the y-limits:
    for j,i in enumerate(range(1, axesCount, 2)):
        axx=fig.get_axes()[i]

        if floatContrast is False:
            xleft, xright=axx.xaxis.get_view_interval()
            # Draw zero reference line.
            axx.hlines(y=0,
                xmin=xleft-1, 
                xmax=xright+1,
                linestyle=contrastZeroLineStyle,
                linewidth=0.75,
                color=contrastZeroLineColor)
            # reset view interval.
            axx.set_xlim(xleft, xright)
            # # Draw back x-axis lines connecting ticks.
            # drawback_x(axx)

            if showAllYAxes is False:
                if i in range(2, axesCount):
                    axx.yaxis.set_visible(False)
                else:
                    # Draw back the lines for the relevant y-axes.
                    # Not entirely sure why I have to do this.
                    drawback_y(axx)

            sns.despine(ax=axx, 
                top=True, right=True, 
                left=False, bottom=False, 
                trim=True)

            # Rotate tick labels.
            rotateTicks(axx,tickAngle,tickAlignment)

        else:
            # Re-draw the floating axis to the correct limits.
            lower=np.min(contrastList.ix['diffarray',j])
            upper=np.max(contrastList.ix['diffarray',j])
            meandiff=contrastList.ix['summary', j]

            ## Make sure we have zero in the limits.
            if lower>0:
                lower=0.
            if upper<0:
                upper=0.

            ## Get the tick interval from the left y-axis.
            leftticks=fig.get_axes()[i-1].get_yticks()
            tickstep=leftticks[1] -leftticks[0]

            ## First re-draw of axis with new tick interval
            axx.yaxis.set_major_locator(MultipleLocator(base=tickstep))
            newticks1=axx.get_yticks()

            ## Obtain major ticks that comfortably encompass lower and upper.
            newticks2=list()
            for a,b in enumerate(newticks1):
                if (b >= lower and b <= upper):
                    # if the tick lies within upper and lower, take it.
                    newticks2.append(b)
            # if the meandiff falls outside of the newticks2 set, add a tick in the right direction.
            if np.max(newticks2)<meandiff:
                ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1.
                newticks2.append( newticks1[ind+1] )
            elif meandiff<np.min(newticks2):
                ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1.
                newticks2.append( newticks1[ind-1] )
            newticks2=np.array(newticks2)
            newticks2.sort()

            ## Second re-draw of axis to shrink it to desired limits.
            axx.yaxis.set_major_locator(FixedLocator(locs=newticks2))
            
            ## Despine the axes.
            sns.despine(ax=axx, trim=True, 
                bottom=False, right=False,
                left=True, top=True)

    # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots.
    if (axesCount>2 and 
        contrastShareY is True and 
        floatContrast is False):

        # Set contrast ylim as max ticks of leftmost swarm axes.
        if contrastYlim is None:
            lower=list()
            upper=list()
            for c in range(0,len(contrastList.columns)):
                lower.append( np.min(contrastList.ix['bca_ci_low',c]) )
                upper.append( np.max(contrastList.ix['bca_ci_high',c]) )
            lower=np.min(lower)
            upper=np.max(upper)
        else:
            lower=contrastYlim[0]
            upper=contrastYlim[1]

        normalizeContrastY(fig, 
            contrast_ylim = contrastYlim, 
            show_all_yaxes = showAllYAxes)

    # if (axesCount==2 and 
    #     floatContrast is False):
    #     drawback_x(fig.get_axes()[1])
    #     drawback_y(fig.get_axes()[1])

    # if swarmShareY is False:
    #     for i in range(0, axesCount, 2):
    #         drawback_y(fig.get_axes()[i])
                       
    # if contrastShareY is False:
    #     for i in range(1, axesCount, 2):
    #         if floatContrast is True:
    #             sns.despine(ax=fig.get_axes()[i], 
    #                        top=True, right=False, left=True, bottom=True, 
    #                        trim=True)
    #         else:
    #             sns.despine(ax=fig.get_axes()[i], trim=True)

    # Zero gaps between plots on the same row, if floatContrast is False
    if (floatContrast is False and showAllYAxes is False):
        gsMain.update(wspace=0.)

    else:    
        # Tight Layout!
        gsMain.tight_layout(fig)
    
    # And we're all done.
    rcdefaults() # restore matplotlib defaults.
    sns.set() # restore seaborn defaults.
    return fig, contrastList