def do_surveys(): with figure("tlx_results", figsize=fig_size(0.44, 1)): sns.factorplot(x="experiment", y="tlx", data=tlx, kind="box") sns.swarmplot(x="experiment", y=r"tlx", data=tlx, palette=cmap_complement, split=True) plt.ylim(0, plt.ylim()[1]) plt.ylabel("NASA-TLX weighted score") with figure("tlx_components", figsize=fig_size(0.44, 1)): components = ["mental", "physical", "temporal", "performance", "effort", "frustration"] molten = pd.melt(tlx, id_vars=["user", "experiment", "order"], value_vars=components, var_name="component", value_name="score") g = sns.barplot(x=r"component", y="score", hue="experiment", data=molten) plt.gca().set_xticklabels( ["MD", "PD", "TD", "P", "E", "F"]) plt.xlabel("NASA-TLX component") plt.ylabel("score") with figure("survey_results", fig_size(0.44, 1)): sns.factorplot(x="experiment", y="total", data=surveys, kind="box") sns.swarmplot(x="experiment", y=r"total", data=surveys, palette=cmap_complement, split=True) plt.ylim(0, plt.ylim()[1]) plt.ylabel("survey score") with figure("survey_components", figsize=fig_size(0.9, 0.5)): molten = pd.melt(surveys, id_vars=["user", "experiment", "order"], value_vars=[r"orientation_understanding", r"orientation_control", r"position_understanding", r"position_control", r"spacial_understanding", r"spacial_control"], var_name="question", value_name="rating") g = sns.barplot(x=r"rating", y=r"question", hue="experiment", data=molten) sns.stripplot(x="rating", y=r"question", data=molten, hue="experiment", split=True, palette=cmap_complement, jitter=0.6, size=3) plt.gca().set_yticklabels( ["angle aware", "angle control", "position aware", "position control", "rel. pos. aware", "rel. pos. control"]) handles, labels = g.get_legend_handles_labels() plt.legend(handles[2:], labels[2:]) plt.xlabel("rating") plt.title("Survey results")
def do_durations(): with figure("duration", figsize=fig_size(0.44, 1)): sns.factorplot(x="experiment", y="duration", data=analyses, kind="box") sns.swarmplot(x="experiment", y="duration", split=True, data=analyses, palette=cmap_complement) plt.ylim(0, plt.ylim()[1]) plt.ylabel("duration (s)") with figure("duration_runs", figsize=fig_size(0.44, 1)): sns.factorplot(x="order", y="duration", hue="experiment", data=analyses, capsize=0.2) plt.ylim(0, plt.ylim()[1]) plt.ylabel("duration (s)") plt.xlabel("run")
def registration_qc( df, anova_type=3, cmap="Set3", extra=False, extra_cmap=EXTRA_COLORSET, group={"sub": "Subject"}, model="{value} ~ C({extra}) + C({group}) + C({repeat}) -1", print_model=False, print_anova=False, repeat={"ses": "Session"}, samri_style=True, save_as=False, show=True, value={"similarity": "Similarity"}, values_rename={}, ): """Aggregate plot of similarity metrics for registration quality control Parameters ---------- df : pandas.DataFrame or str Pandas Dataframe or CSV file containing similarity scores. anova_type : int, optional Type of the ANOVA to use for model analysis. Consult [1]_ for a theoretical overview, and `statsmodels.stats.anova.anova_lm` for the implementation we use. cmap : str or list, optional If a string, the variable specifies the matplotlib colormap [2]_ (qualitative colormaps are recommended) to use for `repeat` highlighting. If a List, the variable should be a list of colors (e.g. `["#00FF00","#2222FF"]`). extra_cmap : str or list, optional If a string, the variable specifies the matplotlib colormap [2]_ (qualitative colormaps are recommended) to use for `extra` highlighting, which is applied as a contour to the `repeat`-colored pacthes. If a List, the variable should be a list of colors (e.g. `["#00FF00","#2222FF"]`). group : str or dict, optional Column of `df` to use as the group factor (values of this factor will represent the x-axis). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting. model : string, optional A string specifying the ANOVA formula as a statsmodels function [3]_. It may contain string substitutions (e.g. `"{value} ~ C({group})"`). print_model : bool, optional Whether to print the model output table. print_anova : bool, optional Whether to print the ANOVA output table. samri_style : bool, optional Whether to apply a generic SAMRI style to the plot. save_as : str, optional Path under which to save the generated plot (format is interpreted from provided extension). show : bool, optional Whether to show the plot in an interactive window. repeat : str or dict, optional Column of `df` to use as the repeat factor (values of this factor will be represent via different hues, according to `cmap`). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting. value : str or dict, optional Column of `df` to use as the value (this variable will be represented on the y-axis). If a dictionary is passed, the column named for the key of the dictionary is renamed to the value, and the value name is then used as the group factor. This is useful for the input of longer but clearer names for plotting. values_rename : dict, optional Dictionary used to rename values in `df`. This is useful for the input of longer but clearer names for plotting (this parameter will not rename column names, for renaming those, see parameters `extra`, `group`, `repeat`, and `value`). Returns ------- pandas.DataFrame ANOVA summary table in DataFrame format. Reference ---------- .. [1] http://goanna.cs.rmit.edu.au/~fscholer/anova.php .. [2] https://matplotlib.org/examples/color/colormaps_reference.html .. [3] http://www.statsmodels.org/dev/example_formulas.html """ import seaborn.apionly as sns import statsmodels.api as sm import statsmodels.formula.api as smf if samri_style: this_path = path.dirname(path.realpath(__file__)) plt.style.use(path.join(this_path, "samri.conf")) try: if isinstance(df, basestring): df = path.abspath(path.expanduser(df)) df = pd.read_csv(df) except NameError: if isinstance(df, str): df = path.abspath(path.expanduser(df)) df = pd.read_csv(df) for key in values_rename: df.replace(to_replace=key, value=values_rename[key], inplace=True) column_renames = {} if isinstance(value, dict): column_renames.update(value) value = list(value.values())[0] if isinstance(group, dict): column_renames.update(group) group = list(group.values())[0] if isinstance(repeat, dict): column_renames.update(repeat) repeat = list(repeat.values())[0] if isinstance(extra, dict): column_renames.update(extra) extra = list(extra.values())[0] df = df.rename(columns=column_renames) model = model.format(value=value, group=group, repeat=repeat, extra=extra) regression_model = smf.ols(model, data=df).fit() if print_model: print(regression_model.summary()) anova_summary = sm.stats.anova_lm(regression_model, typ=anova_type) if print_anova: print(anova_summary) if extra: myplot = sns.swarmplot( x=group, y=value, hue=extra, data=df, size=rcParams["lines.markersize"] * 1.4, palette=sns.color_palette(extra_cmap), ) myplot = sns.swarmplot( x=group, y=value, hue=repeat, data=df, edgecolor=(1, 1, 1, 0.0), linewidth=rcParams["lines.markersize"] * .4, palette=sns.color_palette(cmap), ) plt.legend(loc=rcParams["legend.loc"]) if show: sns.plt.show() if save_as: plt.savefig(path.abspath(path.expanduser(save_as)), bbox_inches='tight') return anova_summary
def pairedcontrast(data, x, y, idcol, reps=3000, statfunction=None, idx=None, figsize=None, beforeAfterSpacer=0.01, violinWidth=0.005, floatOffset=0.05, showRawData=False, showAllYAxes=False, floatContrast=True, smoothboot=False, floatViolinOffset=None, showConnections=True, summaryBar=False, contrastYlim=None, swarmYlim=None, barWidth=0.005, rawMarkerSize=8, rawMarkerType='o', summaryMarkerSize=10, summaryMarkerType='o', summaryBarColor='grey', meansSummaryLineStyle='solid', contrastZeroLineStyle='solid', contrastEffectSizeLineStyle='solid', contrastZeroLineColor='black', contrastEffectSizeLineColor='black', pal=None, legendLoc=2, legendFontSize=12, legendMarkerScale=1, axis_title_size=None, yticksize=None, xticksize=None, tickAngle=45, tickAlignment='right', **kwargs): # Preliminaries. data = data.dropna() # plot params if axis_title_size is None: axis_title_size = 15 if yticksize is None: yticksize = 12 if xticksize is None: xticksize = 12 axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. if idx is None: idx = tuple(np.unique(data[x])[0:2], ) else: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: print(idx, "does not have length 2.") sys.exit(0) else: idx = (tuple(idx, ), ) elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! if (any(len(element) != 2 for element in idx)): # If any of the tuples contain more than 2 elements. print(element, "does not have length 2.") sys.exit(0) if floatViolinOffset is None: floatViolinOffset = beforeAfterSpacer / 2 if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) if swarmYlim is not None: swarmYlim = np.array([swarmYlim[0], swarmYlim[1]]) ## Here we define the palette on all the levels of the 'x' column. ## Thus, if the same pandas dataframe is re-used across different plots, ## the color identity of each group will be maintained. ## Set palette based on total number of categories in data['x'] or data['hue_column'] if 'hue' in kwargs: u = kwargs['hue'] else: u = x if ('color' not in kwargs and 'hue' not in kwargs): kwargs['color'] = 'k' if pal is None: pal = dict( zip(data[u].unique(), sns.color_palette(n_colors=len(data[u].unique())))) else: pal = pal # Initialise figure. if figsize is None: if len(idx) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (6, 6) fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `levs_tuple` shape. gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple. # Set default statfunction if statfunction is None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() for gsIdx, xlevs in enumerate(idx): ## Pivot tempdat to get before and after lines. data_pivot = data.pivot_table(index=idcol, columns=x, values=y) # Start plotting!! if floatContrast is True: ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: gsSubGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx]) ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex=ax_raw, frame_on=False) ## Plot raw data as swarmplot or stripplot. if showRawData is True: swarm_raw = sns.swarmplot(data=data, x=x, y=y, order=xlevs, ax=ax_raw, palette=pal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) else: swarm_raw = sns.stripplot(data=data, x=x, y=y, order=xlevs, ax=ax_raw, palette=pal, **kwargs) swarm_raw.set_ylim(swarmYlim) ## Get some details about the raw data. maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0]) minXAfter = min(swarm_raw.collections[1].get_offsets().T[0]) if showRawData is True: #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2 beforeAfterSpacer = 1 xposAfter = maxXBefore + beforeAfterSpacer xAfterShift = minXAfter - xposAfter ## shift the after swarmpoints closer for aesthetic purposes. offsetSwarmX(swarm_raw.collections[1], -xAfterShift) ## pandas DataFrame of 'before' group x1 = pd.DataFrame({ str(xlevs[0] + '_x'): pd.Series(swarm_raw.collections[0].get_offsets().T[0]), xlevs[0]: pd.Series(swarm_raw.collections[0].get_offsets().T[1]), '_R_': pd.Series(swarm_raw.collections[0].get_facecolors().T[0]), '_G_': pd.Series(swarm_raw.collections[0].get_facecolors().T[1]), '_B_': pd.Series(swarm_raw.collections[0].get_facecolors().T[2]), }) ## join the RGB columns into a tuple, then assign to a column. x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) x1 = x1.sort_values(by=xlevs[0]) x1.index = data_pivot.sort_values(by=xlevs[0]).index ## pandas DataFrame of 'after' group ### create convenient signifiers for column names. befX = str(xlevs[0] + '_x') aftX = str(xlevs[1] + '_x') x2 = pd.DataFrame({ aftX: pd.Series(swarm_raw.collections[1].get_offsets().T[0]), xlevs[1]: pd.Series(swarm_raw.collections[1].get_offsets().T[1]) }) x2 = x2.sort_values(by=xlevs[1]) x2.index = data_pivot.sort_values(by=xlevs[1]).index ## Join x1 and x2, on both their indexes. plotPoints = x1.merge(x2, left_index=True, right_index=True, how='outer') ## Add the hue column if hue argument was passed. if 'hue' in kwargs: h = kwargs['hue'] plotPoints[h] = data.pivot(index=idcol, columns=x, values=h)[xlevs[0]] swarm_raw.legend(loc=legendLoc, fontsize=legendFontSize, markerscale=legendMarkerScale) ## Plot the lines to join the 'before' points to their respective 'after' points. if showConnections is True: for i in plotPoints.index: ax_raw.plot( [plotPoints.ix[i, befX], plotPoints.ix[i, aftX]], [plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]]], linestyle='solid', color=plotPoints.ix[i, '_hue_'], linewidth=0.75, alpha=0.75) ## Hide the raw swarmplot data if so desired. if showRawData is False: swarm_raw.collections[0].set_visible(False) swarm_raw.collections[1].set_visible(False) if showRawData is True: #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2 maxSwarmSpan = 0.5 else: maxSwarmSpan = barWidth ## Plot Summary Bar. if summaryBar is True: # Calculate means means = data.groupby([x], sort=True).mean()[y] # # Calculate medians # medians = data.groupby([x], sort = True).median()[y] ## Draw summary bar. bar_raw = sns.barplot(x=means.index, y=means.values, order=xlevs, ax=ax_raw, ci=0, facecolor=summaryBarColor, alpha=0.25) ## Draw zero reference line. ax_raw.add_artist( Line2D((ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + width / 2. if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) # Get y-limits of the treatment swarm points. beforeRaw = pd.DataFrame(swarm_raw.collections[0].get_offsets()) afterRaw = pd.DataFrame(swarm_raw.collections[1].get_offsets()) before_leftx = min(beforeRaw[0]) after_leftx = min(afterRaw[0]) after_rightx = max(afterRaw[0]) after_stat_summary = statfunction(beforeRaw[1]) # Calculate the summary difference and CI. plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]] plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0] tempseries = plotPoints['delta_y'].tolist() test = tempseries.count(tempseries[0]) != len(tempseries) bootsDelta = bootstrap(plotPoints['delta_y'], statfunction=statfunction, smoothboot=smoothboot, reps=reps) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] # set new xpos for delta violin. if floatContrast is True: if showRawData is False: xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset else: xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan else: xposPlusViolin = xposAfter if showRawData is True: # If showRawData is True and floatContrast is True, # set violinwidth to the barwidth. violinWidth = maxSwarmSpan xmaxPlot = xposPlusViolin + violinWidth # Plot the summary measure. ax_contrast.plot(xposPlusViolin, summDelta, marker='o', markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75) # Plot the CI. ax_contrast.plot([xposPlusViolin, xposPlusViolin], [lowDelta, highDelta], color='k', alpha=0.75, linestyle='solid') # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], widths=violinWidth, showextrema=False, showmeans=False) halfviolin(v, half='right', color='k') # Remove left axes x-axis title. ax_raw.set_xlabel("") # Remove floating axes y-axis title. ax_contrast.set_ylabel("") # Set proper x-limits ax_raw.set_xlim(before_leftx - beforeAfterSpacer / 2, xmaxPlot) ax_raw.get_xaxis().set_view_interval( before_leftx - beforeAfterSpacer / 2, after_rightx + beforeAfterSpacer / 2) ax_contrast.set_xlim(ax_raw.get_xlim()) if floatContrast is True: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) # Make sure they have the same y-limits. ax_contrast.set_ylim(ax_raw.get_ylim()) # Drawing in the x-axis for ax_raw. ## Set the tick labels! ax_raw.set_xticklabels(xlevs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Get lowest y-value for ax_raw. y = ax_raw.get_yaxis().get_view_interval()[0] # Align the left axes and the floating axes. align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]), ax_contrast, 0) # Add label to floating axes. But on ax_raw! ax_raw.text(x=deltaSwarmX, y=ax_raw.get_yaxis().get_view_interval()[0], horizontalalignment='left', s='Difference', fontsize=15) # Set reference lines ## zero line ax_contrast.hlines( 0, # y-coordinate ax_contrast.xaxis.get_majorticklocs() [0], # x-coordinates, start and end. ax_raw.xaxis.get_view_interval()[1], linestyle='solid', linewidth=0.75, color='black') ## effect size line ax_contrast.hlines(summDelta, ax_contrast.xaxis.get_majorticklocs()[1], ax_raw.xaxis.get_view_interval()[1], linestyle='solid', linewidth=0.75, color='black') # Align the left axes and the floating axes. align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.) else: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) fig.add_subplot(ax_raw) fig.add_subplot(ax_contrast) ax_contrast.set_ylim(contrastYlim) # Calculate p-values. # 1-sample t-test to see if the mean of the difference is different from 0. ttestresult = ttest_1samp(plotPoints['delta_y'], popmean=0)[1] bootsDelta['ttest_pval'] = ttestresult contrastList.append(bootsDelta) contrastListNames.append(str(xlevs[1]) + ' v.s. ' + str(xlevs[0])) # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Now we iterate thru the contrast axes to normalize all the ylims. for j, i in enumerate(range(1, len(fig.get_axes()), 2)): axx = fig.get_axes()[i] ## Get max and min of the dataset. lower = np.min(contrastList.ix['stat_array', j]) upper = np.max(contrastList.ix['stat_array', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get tick distance on raw axes. ## This will be the tick distance for the contrast axes. rawAxesTicks = fig.get_axes()[i - 1].yaxis.get_majorticklocs() rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist)) newticks1 = fig.get_axes()[i].get_yticks() if floatContrast is False: if (showAllYAxes is False and i in range(2, len(fig.get_axes()))): axx.get_yaxis().set_visible(showAllYAxes) else: ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Draw zero reference line. axx.hlines( y=0, xmin=fig.get_axes()[i].get_xaxis().get_view_interval()[0], xmax=fig.get_axes()[i].get_xaxis().get_view_interval()[1], linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) sns.despine(ax=fig.get_axes()[i], trim=True, bottom=False, right=True, left=False, top=True) ## Draw back the lines for the relevant y-axes. drawback_y(axx) ## Draw back the lines for the relevant x-axes. drawback_x(axx) elif floatContrast is True: ## Get the original ticks on the floating y-axis. newticks1 = fig.get_axes()[i].get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Re-draw the axis. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine and trim the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) for i in range(0, len(fig.get_axes()), 2): # Loop through the raw data swarmplots and despine them appropriately. if floatContrast is True: sns.despine(ax=fig.get_axes()[i], trim=True, right=True) else: sns.despine(ax=fig.get_axes()[i], trim=True, bottom=True, right=True) fig.get_axes()[i].get_xaxis().set_visible(False) # Draw back the lines for the relevant y-axes. ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0] ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1] x, _ = fig.get_axes()[i].get_xaxis().get_view_interval() fig.get_axes()[i].add_artist( Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5)) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0) else: # Tight Layout! gsMain.tight_layout(fig) # And we're done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def contrastplot_test(data, x, y, idx=None, alpha=0.75, axis_title_size=None, barWidth=5, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), idcol=None, lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, show95CI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas dataframe and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. ----------------------------------------------------------------------- Description of flags upcoming.''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError( "The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # Get and set levels of data[x] if idx is None: widthratio = [1] allgrps = np.sort(data[x].unique()) if paired: # If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in = tuple(allgrps[0:2], ) else: # No idx is given, so all groups are compared to the first one in the DataFrame column. tuple_in = (tuple(allgrps), ) if len(allgrps) > 2: floatContrast = False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) tuple_in = (idx, ) widthratio = [1] if len(idx) > 2: floatContrast = False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! tuple_in = idx if (any(len(element) > 2 for element in tuple_in)): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast = False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio = [] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError( "The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting." ) # initialise statfunction if statfunction == None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() # # Calculate the bootstraps according to idx. # for ix, current_tuple in enumerate(tuple_in): # bscontrast=list() # for i in range (1, len(current_tuple)): # # Note that you start from one. No need to do auto-contrast! # tempbs=bootstrap_contrast( # data=data, # x=x, # y=y, # idx=[current_tuple[0], current_tuple[i]], # statfunction=statfunction, # smoothboot=smoothboot, # reps=reps) # bscontrast.append(tempbs) # contrastList.append(tempbs) # contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol = kwargs['hue'] colGrps = data[colorCol].unique() nColors = len(colGrps) else: colorCol = x colGrps = data[x].unique() nColors = len([element for tupl in tuple_in for element in tupl]) plotPal = dict(zip(colGrps, sns.color_palette(n_colors=nColors))) else: plotPal = pal # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar = True summaryLine = False # Turn off summary line if floatContrast is true if floatContrast: summaryLine = False if swarmYlim is None: # get range of _selected groups_. u = list() for t in idx: for i in np.unique(t): u.append(i) u = np.unique(u) tempdat = data[data[x].isin(u)] swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])]) else: swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]]) if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) barWidth = barWidth / 1000 # Not sure why have to reduce the barwidth by this much! if showRawData is True: maxSwarmSpan = 0.25 else: maxSwarmSpan = barWidth # Expand the ylim in both directions. ## Find half of the range of swarm_ylim. swarmrange = swarm_ylim[1] - swarm_ylim[0] pad = 0.1 * swarmrange x2 = np.array([swarm_ylim[0] - pad, swarm_ylim[1] + pad]) swarm_ylim = x2 # plot params if axis_title_size is None: axis_title_size = 25 if yticksize is None: yticksize = 18 if xticksize is None: xticksize = 18 # Set clean style sns.set(style='ticks') axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} svgParams = {'fonttype': 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (8, (8 / np.sqrt(2))) # Initialise figure, taking into account desired figsize. fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain = gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX plotdat = data[data[x].isin(current_tuple)] plotdat[x] = plotdat[x].astype("category") plotdat[x].cat.set_categories(current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # Drop all nans. plotdat = plotdat.dropna() # Calculate summaries. summaries = plotdat.groupby([x], sort=True)[y].apply(statfunction) if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast = list() for i in range(1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! tempbs = bootstrap_contrast( data=data, x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i] + ' vs. ' + current_tuple[0]) #### PLOT RAW DATA. if showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. ax_raw.set_ylim(swarm_ylim) sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if summaryBar is True: bar_raw = sns.barplot(x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast: # Get horizontal offset values. maxXBefore = max(sw.collections[0].get_offsets().T[0]) minXAfter = min(sw.collections[1].get_offsets().T[0]) xposAfter = maxXBefore + floatSwarmSpacer xAfterShift = minXAfter - xposAfter # shift the swarmplots offsetSwarmX(sw.collections[1], -xAfterShift) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + (width / 2.) if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) ## Set the ticks locations for ax_raw. ax_raw.xaxis.set_ticks((0, xposAfter)) firstTick = ax_raw.xaxis.get_ticklabels()[0].get_text() secondTick = ax_raw.xaxis.get_ticklabels()[1].get_text() ax_raw.set_xticklabels( [ firstTick, #+' n='+count[firstTick], secondTick ], #+' n='+count[secondTick]], rotation=tickAngle, horizontalalignment=tickAlignment) if summaryLine is True: for i, m in enumerate(summaries): ax_raw.plot( (i - summaryLineWidth, i + summaryLineWidth), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if show95CI is True: sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple) == 2: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) if floatContrast: # Set reference lines ## First get leftmost limit of left reference group xtemp, _ = np.array(sw.collections[0].get_offsets()).T leftxlim = xtemp.min() ## Then get leftmost limit of right test group xtemp, _ = np.array(sw.collections[1].get_offsets()).T rightxlim = xtemp.min() ## zero line ax_contrast.hlines( 0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines( tempbs['summary'], rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=0.75, color=contrastEffectSizeLineColor) ## If the effect size is positive, shift the right axis up. if float(tempbs['summary']) > 0: rightmin = ax_raw.get_ylim()[0] - float(tempbs['summary']) rightmax = ax_raw.get_ylim()[1] - float(tempbs['summary']) ## If the effect size is negative, shift the right axis down. elif float(tempbs['summary']) < 0: rightmin = ax_raw.get_ylim()[0] + float(tempbs['summary']) rightmax = ax_raw.get_ylim()[1] + float(tempbs['summary']) ax_contrast.set_ylim(rightmin, rightmax) if gsIdx > 0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) # Set xlims so everything is properly visible! swarm_xbounds = ax_raw.get_xbound() ax_contrast.set_xbound( swarm_xbounds[0] - (summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke(bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx > 0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames ######## axesCount = len(fig.get_axes()) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx = fig.axes[i] if i != axesCount - 2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(showAllYAxes) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist( Line2D((axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) # I don't know why the swarm axes controls the contrast axes ticks.... if showGroupCount: count = data.groupby(x).count()[y] newticks = list() for ix, t in enumerate(axx.xaxis.get_ticklabels()): t_text = t.get_text() nt = t_text + ' n=' + str(count[t_text]) newticks.append(nt) axx.xaxis.set_ticklabels(newticks) if legend is False: axx.legend().set_visible(False) else: if i == axesCount - 2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1, 1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j, i in enumerate(range(1, axesCount, 2)): axx = fig.get_axes()[i] if floatContrast is False: xleft, xright = axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft - 1, xmax=xright + 1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) # # Draw back x-axis lines connecting ticks. # drawback_x(axx) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) # Rotate tick labels. rotateTicks(axx, tickAngle, tickAlignment) else: # Re-draw the floating axis to the correct limits. lower = np.min(contrastList.ix['diffarray', j]) upper = np.max(contrastList.ix['diffarray', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get the tick interval from the left y-axis. leftticks = fig.get_axes()[i - 1].get_yticks() tickstep = leftticks[1] - leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1 = axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount > 2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower = list() upper = list() for c in range(0, len(contrastList.columns)): lower.append(np.min(contrastList.ix['bca_ci_low', c])) upper.append(np.max(contrastList.ix['bca_ci_high', c])) lower = np.min(lower) upper = np.max(upper) else: lower = contrastYlim[0] upper = contrastYlim[1] normalizeContrastY(fig, contrast_ylim=contrastYlim, show_all_yaxes=showAllYAxes) # if (axesCount==2 and # floatContrast is False): # drawback_x(fig.get_axes()[1]) # drawback_y(fig.get_axes()[1]) # if swarmShareY is False: # for i in range(0, axesCount, 2): # drawback_y(fig.get_axes()[i]) # if contrastShareY is False: # for i in range(1, axesCount, 2): # if floatContrast is True: # sns.despine(ax=fig.get_axes()[i], # top=True, right=False, left=True, bottom=True, # trim=True) # else: # sns.despine(ax=fig.get_axes()[i], trim=True) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def pairedcontrast(data, x, y, idcol, reps = 3000, statfunction = None, idx = None, figsize = None, beforeAfterSpacer = 0.01, violinWidth = 0.005, floatOffset = 0.05, showRawData = False, showAllYAxes = False, floatContrast = True, smoothboot = False, floatViolinOffset = None, showConnections = True, summaryBar = False, contrastYlim = None, swarmYlim = None, barWidth = 0.005, rawMarkerSize = 8, rawMarkerType = 'o', summaryMarkerSize = 10, summaryMarkerType = 'o', summaryBarColor = 'grey', meansSummaryLineStyle = 'solid', contrastZeroLineStyle = 'solid', contrastEffectSizeLineStyle = 'solid', contrastZeroLineColor = 'black', contrastEffectSizeLineColor = 'black', pal = None, legendLoc = 2, legendFontSize = 12, legendMarkerScale = 1, axis_title_size = None, yticksize = None, xticksize = None, tickAngle=45, tickAlignment='right', **kwargs): # Preliminaries. data = data.dropna() # plot params if axis_title_size is None: axis_title_size = 15 if yticksize is None: yticksize = 12 if xticksize is None: xticksize = 12 axisTitleParams = {'labelsize' : axis_title_size} xtickParams = {'labelsize' : xticksize} ytickParams = {'labelsize' : yticksize} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. if idx is None: idx = tuple(np.unique(data[x])[0:2],) else: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: print(idx, "does not have length 2.") sys.exit(0) else: idx = (tuple(idx, ),) elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! if ( any(len(element) != 2 for element in idx) ): # If any of the tuples contain more than 2 elements. print(element, "does not have length 2.") sys.exit(0) if floatViolinOffset is None: floatViolinOffset = beforeAfterSpacer/2 if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0],contrastYlim[1]]) if swarmYlim is not None: swarmYlim = np.array([swarmYlim[0],swarmYlim[1]]) ## Here we define the palette on all the levels of the 'x' column. ## Thus, if the same pandas dataframe is re-used across different plots, ## the color identity of each group will be maintained. ## Set palette based on total number of categories in data['x'] or data['hue_column'] if 'hue' in kwargs: u = kwargs['hue'] else: u = x if ('color' not in kwargs and 'hue' not in kwargs): kwargs['color'] = 'k' if pal is None: pal = dict( zip( data[u].unique(), sns.color_palette(n_colors = len(data[u].unique())) ) ) else: pal = pal # Initialise figure. if figsize is None: if len(idx) > 2: figsize = (12,(12/np.sqrt(2))) else: figsize = (6,6) fig = plt.figure(figsize = figsize) # Initialise GridSpec based on `levs_tuple` shape. gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple. # Set default statfunction if statfunction is None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() for gsIdx, xlevs in enumerate(idx): ## Pivot tempdat to get before and after lines. data_pivot = data.pivot_table(index = idcol, columns = x, values = y) # Start plotting!! if floatContrast is True: ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on = False) ax_contrast = ax_raw.twinx() else: gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec = gsMain[gsIdx]) ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on = False) ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex = ax_raw, frame_on = False) ## Plot raw data as swarmplot or stripplot. if showRawData is True: swarm_raw = sns.swarmplot(data = data, x = x, y = y, order = xlevs, ax = ax_raw, palette = pal, size = rawMarkerSize, marker = rawMarkerType, **kwargs) else: swarm_raw = sns.stripplot(data = data, x = x, y = y, order = xlevs, ax = ax_raw, palette = pal, **kwargs) swarm_raw.set_ylim(swarmYlim) ## Get some details about the raw data. maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0]) minXAfter = min(swarm_raw.collections[1].get_offsets().T[0]) if showRawData is True: #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2 beforeAfterSpacer = 1 xposAfter = maxXBefore + beforeAfterSpacer xAfterShift = minXAfter - xposAfter ## shift the after swarmpoints closer for aesthetic purposes. offsetSwarmX(swarm_raw.collections[1], -xAfterShift) ## pandas DataFrame of 'before' group x1 = pd.DataFrame({str(xlevs[0] + '_x') : pd.Series(swarm_raw.collections[0].get_offsets().T[0]), xlevs[0] : pd.Series(swarm_raw.collections[0].get_offsets().T[1]), '_R_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[0]), '_G_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[1]), '_B_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[2]), }) ## join the RGB columns into a tuple, then assign to a column. x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) x1 = x1.sort_values(by = xlevs[0]) x1.index = data_pivot.sort_values(by = xlevs[0]).index ## pandas DataFrame of 'after' group ### create convenient signifiers for column names. befX = str(xlevs[0] + '_x') aftX = str(xlevs[1] + '_x') x2 = pd.DataFrame( {aftX : pd.Series(swarm_raw.collections[1].get_offsets().T[0]), xlevs[1] : pd.Series(swarm_raw.collections[1].get_offsets().T[1])} ) x2 = x2.sort_values(by = xlevs[1]) x2.index = data_pivot.sort_values(by = xlevs[1]).index ## Join x1 and x2, on both their indexes. plotPoints = x1.merge(x2, left_index = True, right_index = True, how='outer') ## Add the hue column if hue argument was passed. if 'hue' in kwargs: h = kwargs['hue'] plotPoints[h] = data.pivot(index = idcol, columns = x, values = h)[xlevs[0]] swarm_raw.legend(loc = legendLoc, fontsize = legendFontSize, markerscale = legendMarkerScale) ## Plot the lines to join the 'before' points to their respective 'after' points. if showConnections is True: for i in plotPoints.index: ax_raw.plot([ plotPoints.ix[i, befX], plotPoints.ix[i, aftX] ], [ plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]] ], linestyle = 'solid', color = plotPoints.ix[i, '_hue_'], linewidth = 0.75, alpha = 0.75 ) ## Hide the raw swarmplot data if so desired. if showRawData is False: swarm_raw.collections[0].set_visible(False) swarm_raw.collections[1].set_visible(False) if showRawData is True: #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2 maxSwarmSpan = 0.5 else: maxSwarmSpan = barWidth ## Plot Summary Bar. if summaryBar is True: # Calculate means means = data.groupby([x], sort = True).mean()[y] # # Calculate medians # medians = data.groupby([x], sort = True).median()[y] ## Draw summary bar. bar_raw = sns.barplot(x = means.index, y = means.values, order = xlevs, ax = ax_raw, ci = 0, facecolor = summaryBarColor, alpha = 0.25) ## Draw zero reference line. ax_raw.add_artist(Line2D( (ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + width/2. if i == 0: bar.set_x(centre - maxSwarmSpan/2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) # Get y-limits of the treatment swarm points. beforeRaw = pd.DataFrame( swarm_raw.collections[0].get_offsets() ) afterRaw = pd.DataFrame( swarm_raw.collections[1].get_offsets() ) before_leftx = min(beforeRaw[0]) after_leftx = min(afterRaw[0]) after_rightx = max(afterRaw[0]) after_stat_summary = statfunction(beforeRaw[1]) # Calculate the summary difference and CI. plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]] plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0] tempseries = plotPoints['delta_y'].tolist() test = tempseries.count(tempseries[0]) != len(tempseries) bootsDelta = bootstrap(plotPoints['delta_y'], statfunction = statfunction, smoothboot = smoothboot, reps = reps) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] # set new xpos for delta violin. if floatContrast is True: if showRawData is False: xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset else: xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan else: xposPlusViolin = xposAfter if showRawData is True: # If showRawData is True and floatContrast is True, # set violinwidth to the barwidth. violinWidth = maxSwarmSpan xmaxPlot = xposPlusViolin + violinWidth # Plot the summary measure. ax_contrast.plot(xposPlusViolin, summDelta, marker = 'o', markerfacecolor = 'k', markersize = summaryMarkerSize, alpha = 0.75 ) # Plot the CI. ax_contrast.plot([xposPlusViolin, xposPlusViolin], [lowDelta, highDelta], color = 'k', alpha = 0.75, linestyle = 'solid' ) # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], widths = violinWidth, showextrema = False, showmeans = False) halfviolin(v, half = 'right', color = 'k') # Remove left axes x-axis title. ax_raw.set_xlabel("") # Remove floating axes y-axis title. ax_contrast.set_ylabel("") # Set proper x-limits ax_raw.set_xlim(before_leftx - beforeAfterSpacer/2, xmaxPlot) ax_raw.get_xaxis().set_view_interval(before_leftx - beforeAfterSpacer/2, after_rightx + beforeAfterSpacer/2) ax_contrast.set_xlim(ax_raw.get_xlim()) if floatContrast is True: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) # Make sure they have the same y-limits. ax_contrast.set_ylim(ax_raw.get_ylim()) # Drawing in the x-axis for ax_raw. ## Set the tick labels! ax_raw.set_xticklabels(xlevs, rotation = tickAngle, horizontalalignment = tickAlignment) ## Get lowest y-value for ax_raw. y = ax_raw.get_yaxis().get_view_interval()[0] # Align the left axes and the floating axes. align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]), ax_contrast, 0) # Add label to floating axes. But on ax_raw! ax_raw.text(x = deltaSwarmX, y = ax_raw.get_yaxis().get_view_interval()[0], horizontalalignment = 'left', s = 'Difference', fontsize = 15) # Set reference lines ## zero line ax_contrast.hlines(0, # y-coordinate ax_contrast.xaxis.get_majorticklocs()[0], # x-coordinates, start and end. ax_raw.xaxis.get_view_interval()[1], linestyle = 'solid', linewidth = 0.75, color = 'black') ## effect size line ax_contrast.hlines(summDelta, ax_contrast.xaxis.get_majorticklocs()[1], ax_raw.xaxis.get_view_interval()[1], linestyle = 'solid', linewidth = 0.75, color = 'black') # Align the left axes and the floating axes. align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.) else: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) fig.add_subplot(ax_raw) fig.add_subplot(ax_contrast) ax_contrast.set_ylim(contrastYlim) # Calculate p-values. # 1-sample t-test to see if the mean of the difference is different from 0. ttestresult = ttest_1samp(plotPoints['delta_y'], popmean = 0)[1] bootsDelta['ttest_pval'] = ttestresult contrastList.append(bootsDelta) contrastListNames.append( str(xlevs[1])+' v.s. '+str(xlevs[0]) ) # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Now we iterate thru the contrast axes to normalize all the ylims. for j,i in enumerate(range(1, len(fig.get_axes()), 2)): axx=fig.get_axes()[i] ## Get max and min of the dataset. lower = np.min(contrastList.ix['stat_array',j]) upper = np.max(contrastList.ix['stat_array',j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get tick distance on raw axes. ## This will be the tick distance for the contrast axes. rawAxesTicks = fig.get_axes()[i-1].yaxis.get_majorticklocs() rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist)) newticks1 = fig.get_axes()[i].get_yticks() if floatContrast is False: if (showAllYAxes is False and i in range( 2, len(fig.get_axes())) ): axx.get_yaxis().set_visible(showAllYAxes) else: ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2 = np.array(newticks2) newticks2.sort() axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) ## Draw zero reference line. axx.hlines(y = 0, xmin = fig.get_axes()[i].get_xaxis().get_view_interval()[0], xmax = fig.get_axes()[i].get_xaxis().get_view_interval()[1], linestyle = contrastZeroLineStyle, linewidth = 0.75, color = contrastZeroLineColor) sns.despine(ax = fig.get_axes()[i], trim = True, bottom = False, right = True, left = False, top = True) ## Draw back the lines for the relevant y-axes. drawback_y(axx) ## Draw back the lines for the relevant x-axes. drawback_x(axx) elif floatContrast is True: ## Get the original ticks on the floating y-axis. newticks1 = fig.get_axes()[i].get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2 = np.array(newticks2) newticks2.sort() ## Re-draw the axis. axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) ## Despine and trim the axes. sns.despine(ax = axx, trim = True, bottom = False, right = False, left = True, top = True) for i in range(0, len(fig.get_axes()), 2): # Loop through the raw data swarmplots and despine them appropriately. if floatContrast is True: sns.despine(ax = fig.get_axes()[i], trim = True, right = True) else: sns.despine(ax = fig.get_axes()[i], trim = True, bottom = True, right = True) fig.get_axes()[i].get_xaxis().set_visible(False) # Draw back the lines for the relevant y-axes. ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0] ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1] x, _ = fig.get_axes()[i].get_xaxis().get_view_interval() fig.get_axes()[i].add_artist(Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5)) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace = 0) else: # Tight Layout! gsMain.tight_layout(fig) # And we're done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def contrastplot( data, x=None, y=None, idx=None, idcol=None, alpha=0.75, axis_title_size=None, ci=95, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', connectPairs=True, effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pairedDeltaLineAlpha=0.3, pairedDeltaLineWidth=1.2, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, showCI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas DataFrame and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. Paired and unpaired options available. Keyword arguments: data: pandas DataFrame x: string column name containing categories to be plotted on the x-axis. y: string column name containing values to be plotted on the y-axis. idx: tuple flxible declaration of groupwise comparisons. idcol: string for paired plots. alpha: float alpha (transparency) of raw swarmed data points. axis_title_size=None ci=95 contrastShareY=True contrastEffectSizeLineStyle='solid' contrastEffectSizeLineColor='black' contrastYlim=None contrastZeroLineStyle='solid' contrastZeroLineColor='black' effectSizeYLabel="Effect Size" figsize=None floatContrast=True floatSwarmSpacer=0.2 heightRatio=(1,1) lineWidth=2 legend=True legendFontSize=14 legendFontProps={} paired=False pairedDeltaLineAlpha=0.3 pairedDeltaLineWidth=1.2 pal=None rawMarkerSize=8 rawMarkerType='o' reps=3000 showGroupCount=True showCI=False showAllYAxes=False showRawData=True smoothboot=False statfunction=None summaryBar=False summaryBarColor='grey' summaryBarAlpha=0.25 summaryColour='black' summaryLine=True summaryLineStyle='solid' summaryLineWidth=0.25 summaryMarkerSize=10 summaryMarkerType='o' swarmShareY=True swarmYlim=None tickAngle=45 tickAlignment='right' violinOffset=0.375 violinWidth=0.2 violinColor='k' xticksize=None yticksize=None Returns: An matplotlib Figure. Organization of figure Axes. ''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError("The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # make sure that at least x, y, and idx are specified. if x is None and y is None and idx is None: raise ValueError('You need to specify `x` and `y`, or `idx`. Neither has been specifed.') if x is None: # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column. datatype='wide' # Check that the idx are legit columns. all_idx=np.unique([element for tupl in idx for element in tupl]) # # melt the data. # data=pd.melt(data,value_vars=all_idx) # x='variable' # y='value' else: # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint. datatype='long' # make sure y is not none. if y is None: raise ValueError("`paired` is false, but no y-column given.") # Calculate Ns. counts=data.groupby(x)[y].count() # Get and set levels of data[x] if paired is True: violinWidth=0.1 # # Calculate Ns--which should be simply the number of rows in data. # counts=len(data) # is idcol supplied? if idcol is None and datatype=='long': raise ValueError('`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.') if idx is not None: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # check that every idx is a column name. idx_not_in_cols=[n for n in idx if n not in data[x].unique()] if len(idx_not_in_cols)!=0: raise ValueError(str(idx_not_in_cols)+" cannot be found in the columns of `data`.") # data_wide_cols=[n for n in idx if n in data.columns] # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: raise ValueError(idx+" does not have length 2.") else: tuple_in=(tuple(idx, ),) widthratio=[1] elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_cols=[n for tup in idx for n in tup if n not in data[x].unique()] if len(idx_not_in_cols)!=0: raise ValueError(str(idx_not_in_cols)+" cannot be found in the column "+x) # data_wide_cols=[n for tup in idx for n in tup if n in data.columns] if ( any(len(element) != 2 for element in idx) ): # If any of the tuples does not contain exactly 2 elements. raise ValueError(element+" does not have length 2.") # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. tuple_in=idx widthratio=[] for i in tuple_in: widthratio.append(len(i)) elif idx is None: raise ValueError('Please specify idx.') showRawData=False # Just show lines, do not show data. showCI=False # wait till I figure out how to plot this for sns.barplot. if datatype=='long': if idx is None: ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in=tuple(np.sort(np.unique(data[x]))[0:2],) # pivot the dataframe if it is long! data_pivot=data.pivot_table(index = idcol, columns = x, values = y) elif paired is False: if idx is None: widthratio=[1] tuple_in=( tuple(data[x].unique()) ,) if len(tuple_in[0])>2: floatContrast=False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) # check all every idx specified can be found in data[x] idx_not_in_x=[n for n in idx if n not in data[x].unique()] if len(idx_not_in_x)!=0: raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x) tuple_in=(idx, ) widthratio=[1] if len(idx)>2: floatContrast=False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_x=[n for tup in idx for n in tup if n not in data[x].unique()] if len(idx_not_in_x)!=0: raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x) tuple_in=idx if ( any(len(element)>2 for element in tuple_in) ): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast=False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio=[] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting.") # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar=True summaryLine=False # Turn off summary line if floatContrast is true if floatContrast: summaryLine=False # initialise statfunction if statfunction == None: statfunction=np.mean # Create list to collect all the contrast DataFrames generated. contrastList=list() contrastListNames=list() # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol=kwargs['hue'] if colorCol not in data.columns: raise ValueError(colorCol+' is not a column name.') colGrps=data[colorCol].unique()#.tolist() plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) ) else: if datatype=='long': colGrps=data[x].unique()#.tolist() plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) ) if datatype=='wide': plotPal=np.repeat('k',len(data)) else: if datatype=='long': plotPal=pal if datatype=='wide': plotPal=list(map(lambda x:pal[x], data[hue])) if swarmYlim is None: # get range of _selected groups_. # u = list() # for t in tuple_in: # for i in np.unique(t): # u.append(i) # u = np.unique(u) u=np.unique([element for tupl in tuple_in for element in tupl]) if datatype=='long': tempdat=data[data[x].isin(u)] swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])]) if datatype=='wide': allMin=list() allMax=list() for col in u: allMin.append(np.min(data[col])) allMax.append(np.max(data[col])) swarm_ylim=np.array( [np.min(allMin),np.max(allMax)] ) swarm_ylim=np.round(swarm_ylim) else: swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]]) if summaryBar is True: lims=swarm_ylim # check that 0 lies within the desired limits. # if not, extend (upper or lower) limit to zero. if 0 not in range( int(round(lims[0])),int(round(lims[1])) ): # turn swarm_ylim to integer range. # check if all negative:. if lims[0]<0. and lims[1]<0.: swarm_ylim=np.array([np.min(lims),0.]) # check if all positive. elif lims[0]>0. and lims[1]>0.: swarm_ylim=np.array([0.,np.max(lims)]) if contrastYlim is not None: contrastYlim=np.array([contrastYlim[0],contrastYlim[1]]) # plot params if axis_title_size is None: axis_title_size=27 if yticksize is None: yticksize=22 if xticksize is None: xticksize=22 # Set clean style sns.set(style='ticks') axisTitleParams={'labelsize' : axis_title_size} xtickParams={'labelsize' : xticksize} ytickParams={'labelsize' : yticksize} svgParams={'fonttype' : 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in)>2: figsize=(12,(12/np.sqrt(2))) else: figsize=(8,(8/np.sqrt(2))) # calculate CI. if ci<0 or ci>100: raise ValueError('ci should be between 0 and 100.') alpha_level=(100.-ci)/100. # Initialise figure, taking into account desired figsize. fig=plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain=gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0 ) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX if datatype=='long': plotdat=data[data[x].isin(current_tuple)] plotdat[x]=plotdat[x].astype("category") plotdat[x].cat.set_categories( current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # # Drop all nans. # plotdat.dropna(inplace=True) summaries=plotdat.groupby(x)[y].apply(statfunction) if datatype=='wide': plotdat=data[list(current_tuple)] summaries=statfunction(plotdat) plotdat=pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA. if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot. ax_raw=fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast=ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw=plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast=plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast=list() if paired is False: tempplotdat=plotdat[[x,y]] # only select the columns used for x and y plotting. for i in range (1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! # if datatype=='long':aas tempbs=bootstrap_contrast( data=tempplotdat.dropna(), x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) #### PLOT RAW DATA. ax_raw.set_ylim(swarm_ylim) # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if paired is False and showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. sw=sns.swarmplot( data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if floatContrast: # Get horizontal offset values. maxXBefore=max(sw.collections[0].get_offsets().T[0]) minXAfter=min(sw.collections[1].get_offsets().T[0]) xposAfter=maxXBefore+floatSwarmSpacer xAfterShift=minXAfter-xposAfter # shift the (second) swarmplot offsetSwarmX(sw.collections[1], -xAfterShift) # shift the tick. ax_raw.set_xticks([0.,1-xAfterShift]) elif paired is True: if showRawData is True: sw=sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if connectPairs is True: # Produce paired plot with lines. before=plotdat[plotdat[x]==current_tuple[0]][y].tolist() after=plotdat[plotdat[x]==current_tuple[1]][y].tolist() linedf=pd.DataFrame( {'before':before, 'after':after} ) # to get color, need to loop thru each line and plot individually. for ii in range(0,len(linedf)): ax_raw.plot( [0,0.25], [ linedf.loc[ii,'before'], linedf.loc[ii,'after'] ], linestyle='solid', linewidth=pairedDeltaLineWidth, color=plotPal[current_tuple[0]], alpha=pairedDeltaLineAlpha, ) ax_raw.set_xlim(-0.25,0.5) ax_raw.set_xticks([0,0.25]) ax_raw.set_xticklabels([current_tuple[0],current_tuple[1]]) # if swarmYlim is None: # # if swarmYlim was not specified, tweak the y-axis # # to show all the data without losing ticks and range. # ## Get all yticks. # axxYTicks=ax_raw.yaxis.get_majorticklocs() # ## Get ytick interval. # YTickInterval=axxYTicks[1]-axxYTicks[0] # ## Get current ylim # currentYlim=ax_raw.get_ylim() # ## Extend ylim by adding a fifth of the tick interval as spacing at both ends. # ax_raw.set_ylim( # currentYlim[0]-(YTickInterval/5), # currentYlim[1]+(YTickInterval/5) # ) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if summaryBar is True: if paired is False: bar_raw=sns.barplot( x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast is True: maxSwarmSpan=2/10. xlocs=list() for i, bar in enumerate(bar_raw.patches): x_width=bar.get_x() width=bar.get_width() centre=x_width + (width/2.) if i == 0: bar.set_x(centre-maxSwarmSpan/2.) xlocs.append(centre) else: bar.set_x(centre-xAfterShift-maxSwarmSpan/2.) xlocs.append(centre-xAfterShift) bar.set_width(maxSwarmSpan) ax_raw.set_xticks(xlocs) # make sure xticklocs match the barplot. elif floatContrast is False: maxSwarmSpan=4/10. xpos=ax_raw.xaxis.get_majorticklocs() for i, bar in enumerate(bar_raw.patches): bar.set_x(xpos[i]-maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) else: # if paired is true ax_raw.bar([0,0.25], [ statfunction(plotdat[current_tuple[0]]), statfunction(plotdat[current_tuple[1]]) ], color=summaryBarColor, alpha=0.5, width=0.05) ## Draw zero reference line. ax_raw.add_artist(Line2D( (ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0,0), color='k', linewidth=1.25) ) if summaryLine is True: if paired is True: xdelta=0 else: xdelta=summaryLineWidth for i, m in enumerate(summaries): ax_raw.plot( (i-xdelta, i+xdelta), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if showCI is True: sns.barplot( data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple)==2: if paired is False: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) else: bootsDelta = bootstrap( plotdat[current_tuple[1]]-plotdat[current_tuple[0]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) contrastList.append(bootsDelta) contrastListNames.append(current_tuple[1]+' vs. '+current_tuple[0]) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] if floatContrast: xpos=0.375 else: xpos=0.25 # Plot the summary measure. ax_contrast.plot(xpos, bootsDelta['summary'], marker=summaryMarkerType, markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75 ) # Plot the CI. ax_contrast.plot([xpos, xpos], [lowDelta, highDelta], color='k', alpha=0.75, # linewidth=1, linestyle='solid' ) # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], widths = violinWidth, showextrema = False, showmeans = False) halfviolin(v, half = 'right', color = 'k') if floatContrast: # Set reference lines if paired is False: ## First get leftmost limit of left reference group xtemp, _=np.array(sw.collections[0].get_offsets()).T leftxlim=xtemp.min() ## Then get leftmost limit of right test group xtemp, _=np.array(sw.collections[1].get_offsets()).T rightxlim=xtemp.min() ref=tempbs['summary'] else: leftxlim=0 rightxlim=0.25 ref=bootsDelta['summary'] ax_contrast.set_xlim(-0.25, 0.5) # does this work? ## zero line ax_contrast.hlines(0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=1, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines(ref, rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=1, color=contrastEffectSizeLineColor) if paired is False: es=float(tempbs['summary']) refSum=tempbs['statistic_ref'] else: es=float(bootsDelta['summary']) refSum=statfunction(plotdat[current_tuple[0]]) ## If the effect size is positive, shift the right axis up. if es>0: rightmin=ax_raw.get_ylim()[0]-es rightmax=ax_raw.get_ylim()[1]-es ## If the effect size is negative, shift the right axis down. elif es<0: rightmin=ax_raw.get_ylim()[0]+es rightmax=ax_raw.get_ylim()[1]+es ax_contrast.set_ylim(rightmin, rightmax) if gsIdx>0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, refSum, ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) if paired is False: # Set xlims so everything is properly visible! swarm_xbounds=ax_raw.get_xbound() ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: ax_contrast.set_xlim(-0.05,0.25+violinWidth) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke( bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx>0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList=pd.DataFrame(contrastList).T contrastList.columns=contrastListNames # Get number of axes in figure for aesthetic tweaks. axesCount=len(fig.get_axes()) for i in range(0, axesCount, 2): # Set new tick labels. # The tick labels belong to the SWARM axes # for both floating and non-floating plots. # This is because `sharex` was invoked. axx=fig.axes[i] newticklabs=list() for xticklab in axx.xaxis.get_ticklabels(): t=xticklab.get_text() if paired: N=str(counts) else: N=str(counts.ix[t]) if showGroupCount: newticklabs.append(t+' n='+N) else: newticklabs.append(t) axx.set_xticklabels( newticklabs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx=fig.axes[i] if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if i==0: drawback_y(axx) if i!=axesCount-2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) else: drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist(Line2D( (axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) if legend is False: axx.legend().set_visible(False) else: if i==axesCount-2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1,1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j,i in enumerate(range(1, axesCount, 2)): axx=fig.get_axes()[i] if floatContrast is False: xleft, xright=axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft-1, xmax=xright+1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes, only is axesCount is 2. # Not entirely sure why I have to do this. if axesCount==2: drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) if j==0 and axesCount==2: # Draw back x-axis lines connecting ticks. drawback_x(axx) # Rotate tick labels. rotateTicks(axx,tickAngle,tickAlignment) elif floatContrast is True: if paired is True: # Get the bootstrapped contrast range. lower=np.min(contrastList.ix['stat_array',j]) upper=np.max(contrastList.ix['stat_array',j]) else: lower=np.min(contrastList.ix['diffarray',j]) upper=np.max(contrastList.ix['diffarray',j]) meandiff=contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower>0: lower=0. if upper<0: upper=0. ## Get the tick interval from the left y-axis. leftticks=fig.get_axes()[i-1].get_yticks() tickstep=leftticks[1] -leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1=axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2=list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2)<meandiff: ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff<np.min(newticks2): ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2=np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount>2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower=list() upper=list() for c in range(0,len(contrastList.columns)): lower.append( np.min(contrastList.ix['bca_ci_low',c]) ) upper.append( np.max(contrastList.ix['bca_ci_high',c]) ) lower=np.min(lower) upper=np.max(upper) else: lower=contrastYlim[0] upper=contrastYlim[1] normalizeContrastY(fig, contrast_ylim = contrastYlim, show_all_yaxes = showAllYAxes) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
plt.show() # Create scatterplot matrix fig = sns.pairplot(data=data[[ 'alcohol', 'color_intensity', 'malic_acid', 'magnesium', 'category' ]], hue='category') plt.show() # ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------ # Create bee swarm plot sns.swarmplot(x='category', y='total_phenols', data=data) plt.show() # ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------ # Cumulative Distribution Function Plots # Sort and normalize data x = np.sort(data['hue']) y = np.arange(1, x.shape[0] + 1, dtype='float32') / x.shape[0] plt.plot(x, y, marker='o', linestyle='') plt.ylabel('ECDF')
def contrastplot(data, x=None, y=None, idx=None, idcol=None, alpha=0.75, axis_title_size=None, ci=95, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', connectPairs=True, effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pairedDeltaLineAlpha=0.3, pairedDeltaLineWidth=1.2, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, showCI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas DataFrame and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. Paired and unpaired options available. Keyword arguments: data: pandas DataFrame x: string column name containing categories to be plotted on the x-axis. y: string column name containing values to be plotted on the y-axis. idx: tuple flxible declaration of groupwise comparisons. idcol: string for paired plots. alpha: float alpha (transparency) of raw swarmed data points. axis_title_size=None ci=95 contrastShareY=True contrastEffectSizeLineStyle='solid' contrastEffectSizeLineColor='black' contrastYlim=None contrastZeroLineStyle='solid' contrastZeroLineColor='black' effectSizeYLabel="Effect Size" figsize=None floatContrast=True floatSwarmSpacer=0.2 heightRatio=(1,1) lineWidth=2 legend=True legendFontSize=14 legendFontProps={} paired=False pairedDeltaLineAlpha=0.3 pairedDeltaLineWidth=1.2 pal=None rawMarkerSize=8 rawMarkerType='o' reps=3000 showGroupCount=True showCI=False showAllYAxes=False showRawData=True smoothboot=False statfunction=None summaryBar=False summaryBarColor='grey' summaryBarAlpha=0.25 summaryColour='black' summaryLine=True summaryLineStyle='solid' summaryLineWidth=0.25 summaryMarkerSize=10 summaryMarkerType='o' swarmShareY=True swarmYlim=None tickAngle=45 tickAlignment='right' violinOffset=0.375 violinWidth=0.2 violinColor='k' xticksize=None yticksize=None Returns: An matplotlib Figure. Organization of figure Axes. ''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError( "The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # make sure that at least x, y, and idx are specified. if x is None and y is None and idx is None: raise ValueError( 'You need to specify `x` and `y`, or `idx`. Neither has been specifed.' ) if x is None: # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column. datatype = 'wide' # Check that the idx are legit columns. all_idx = np.unique([element for tupl in idx for element in tupl]) # # melt the data. # data=pd.melt(data,value_vars=all_idx) # x='variable' # y='value' else: # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint. datatype = 'long' # make sure y is not none. if y is None: raise ValueError("`paired` is false, but no y-column given.") # Calculate Ns. counts = data.groupby(x)[y].count() # Get and set levels of data[x] if paired is True: violinWidth = 0.1 # # Calculate Ns--which should be simply the number of rows in data. # counts=len(data) # is idcol supplied? if idcol is None and datatype == 'long': raise ValueError( '`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.' ) if idx is not None: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # check that every idx is a column name. idx_not_in_cols = [n for n in idx if n not in data[x].unique()] if len(idx_not_in_cols) != 0: raise ValueError( str(idx_not_in_cols) + " cannot be found in the columns of `data`.") # data_wide_cols=[n for n in idx if n in data.columns] # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: raise ValueError(idx + " does not have length 2.") else: tuple_in = (tuple(idx, ), ) widthratio = [1] elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_cols = [ n for tup in idx for n in tup if n not in data[x].unique() ] if len(idx_not_in_cols) != 0: raise ValueError( str(idx_not_in_cols) + " cannot be found in the column " + x) # data_wide_cols=[n for tup in idx for n in tup if n in data.columns] if (any(len(element) != 2 for element in idx)): # If any of the tuples does not contain exactly 2 elements. raise ValueError(element + " does not have length 2.") # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. tuple_in = idx widthratio = [] for i in tuple_in: widthratio.append(len(i)) elif idx is None: raise ValueError('Please specify idx.') showRawData = False # Just show lines, do not show data. showCI = False # wait till I figure out how to plot this for sns.barplot. if datatype == 'long': if idx is None: ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in = tuple(np.sort(np.unique(data[x]))[0:2], ) # pivot the dataframe if it is long! data_pivot = data.pivot_table(index=idcol, columns=x, values=y) elif paired is False: if idx is None: widthratio = [1] tuple_in = (tuple(data[x].unique()), ) if len(tuple_in[0]) > 2: floatContrast = False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) # check all every idx specified can be found in data[x] idx_not_in_x = [n for n in idx if n not in data[x].unique()] if len(idx_not_in_x) != 0: raise ValueError( str(idx_not_in_x) + " cannot be found in the column " + x) tuple_in = (idx, ) widthratio = [1] if len(idx) > 2: floatContrast = False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_x = [ n for tup in idx for n in tup if n not in data[x].unique() ] if len(idx_not_in_x) != 0: raise ValueError( str(idx_not_in_x) + " cannot be found in the column " + x) tuple_in = idx if (any(len(element) > 2 for element in tuple_in)): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast = False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio = [] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError( "The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting." ) # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar = True summaryLine = False # Turn off summary line if floatContrast is true if floatContrast: summaryLine = False # initialise statfunction if statfunction == None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol = kwargs['hue'] if colorCol not in data.columns: raise ValueError(colorCol + ' is not a column name.') colGrps = data[colorCol].unique() #.tolist() plotPal = dict( zip(colGrps, sns.color_palette(n_colors=len(colGrps)))) else: if datatype == 'long': colGrps = data[x].unique() #.tolist() plotPal = dict( zip(colGrps, sns.color_palette(n_colors=len(colGrps)))) if datatype == 'wide': plotPal = np.repeat('k', len(data)) else: if datatype == 'long': plotPal = pal if datatype == 'wide': plotPal = list(map(lambda x: pal[x], data[hue])) if swarmYlim is None: # get range of _selected groups_. # u = list() # for t in tuple_in: # for i in np.unique(t): # u.append(i) # u = np.unique(u) u = np.unique([element for tupl in tuple_in for element in tupl]) if datatype == 'long': tempdat = data[data[x].isin(u)] swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])]) if datatype == 'wide': allMin = list() allMax = list() for col in u: allMin.append(np.min(data[col])) allMax.append(np.max(data[col])) swarm_ylim = np.array([np.min(allMin), np.max(allMax)]) swarm_ylim = np.round(swarm_ylim) else: swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]]) if summaryBar is True: lims = swarm_ylim # check that 0 lies within the desired limits. # if not, extend (upper or lower) limit to zero. if 0 not in range(int(round(lims[0])), int(round( lims[1]))): # turn swarm_ylim to integer range. # check if all negative:. if lims[0] < 0. and lims[1] < 0.: swarm_ylim = np.array([np.min(lims), 0.]) # check if all positive. elif lims[0] > 0. and lims[1] > 0.: swarm_ylim = np.array([0., np.max(lims)]) if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) # plot params if axis_title_size is None: axis_title_size = 27 if yticksize is None: yticksize = 22 if xticksize is None: xticksize = 22 # Set clean style sns.set(style='ticks') axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} svgParams = {'fonttype': 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (8, (8 / np.sqrt(2))) # calculate CI. if ci < 0 or ci > 100: raise ValueError('ci should be between 0 and 100.') alpha_level = (100. - ci) / 100. # Initialise figure, taking into account desired figsize. fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain = gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX if datatype == 'long': plotdat = data[data[x].isin(current_tuple)] plotdat[x] = plotdat[x].astype("category") plotdat[x].cat.set_categories(current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # # Drop all nans. # plotdat.dropna(inplace=True) summaries = plotdat.groupby(x)[y].apply(statfunction) if datatype == 'wide': plotdat = data[list(current_tuple)] summaries = statfunction(plotdat) plotdat = pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA. if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot. ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast = list() if paired is False: tempplotdat = plotdat[[ x, y ]] # only select the columns used for x and y plotting. for i in range(1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! # if datatype=='long':aas tempbs = bootstrap_contrast( data=tempplotdat.dropna(), x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i] + ' vs. ' + current_tuple[0]) #### PLOT RAW DATA. ax_raw.set_ylim(swarm_ylim) # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if paired is False and showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if floatContrast: # Get horizontal offset values. maxXBefore = max(sw.collections[0].get_offsets().T[0]) minXAfter = min(sw.collections[1].get_offsets().T[0]) xposAfter = maxXBefore + floatSwarmSpacer xAfterShift = minXAfter - xposAfter # shift the (second) swarmplot offsetSwarmX(sw.collections[1], -xAfterShift) # shift the tick. ax_raw.set_xticks([0., 1 - xAfterShift]) elif paired is True: if showRawData is True: sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if connectPairs is True: # Produce paired plot with lines. before = plotdat[plotdat[x] == current_tuple[0]][y].tolist() after = plotdat[plotdat[x] == current_tuple[1]][y].tolist() linedf = pd.DataFrame({'before': before, 'after': after}) # to get color, need to loop thru each line and plot individually. for ii in range(0, len(linedf)): ax_raw.plot( [0, 0.25], [linedf.loc[ii, 'before'], linedf.loc[ii, 'after']], linestyle='solid', linewidth=pairedDeltaLineWidth, color=plotPal[current_tuple[0]], alpha=pairedDeltaLineAlpha, ) ax_raw.set_xlim(-0.25, 0.5) ax_raw.set_xticks([0, 0.25]) ax_raw.set_xticklabels([current_tuple[0], current_tuple[1]]) # if swarmYlim is None: # # if swarmYlim was not specified, tweak the y-axis # # to show all the data without losing ticks and range. # ## Get all yticks. # axxYTicks=ax_raw.yaxis.get_majorticklocs() # ## Get ytick interval. # YTickInterval=axxYTicks[1]-axxYTicks[0] # ## Get current ylim # currentYlim=ax_raw.get_ylim() # ## Extend ylim by adding a fifth of the tick interval as spacing at both ends. # ax_raw.set_ylim( # currentYlim[0]-(YTickInterval/5), # currentYlim[1]+(YTickInterval/5) # ) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if summaryBar is True: if paired is False: bar_raw = sns.barplot(x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast is True: maxSwarmSpan = 2 / 10. xlocs = list() for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + (width / 2.) if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) xlocs.append(centre) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) xlocs.append(centre - xAfterShift) bar.set_width(maxSwarmSpan) ax_raw.set_xticks( xlocs) # make sure xticklocs match the barplot. elif floatContrast is False: maxSwarmSpan = 4 / 10. xpos = ax_raw.xaxis.get_majorticklocs() for i, bar in enumerate(bar_raw.patches): bar.set_x(xpos[i] - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) else: # if paired is true ax_raw.bar([0, 0.25], [ statfunction(plotdat[current_tuple[0]]), statfunction(plotdat[current_tuple[1]]) ], color=summaryBarColor, alpha=0.5, width=0.05) ## Draw zero reference line. ax_raw.add_artist( Line2D((ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0, 0), color='k', linewidth=1.25)) if summaryLine is True: if paired is True: xdelta = 0 else: xdelta = summaryLineWidth for i, m in enumerate(summaries): ax_raw.plot( (i - xdelta, i + xdelta), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if showCI is True: sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple) == 2: if paired is False: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) else: bootsDelta = bootstrap(plotdat[current_tuple[1]] - plotdat[current_tuple[0]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) contrastList.append(bootsDelta) contrastListNames.append(current_tuple[1] + ' vs. ' + current_tuple[0]) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] if floatContrast: xpos = 0.375 else: xpos = 0.25 # Plot the summary measure. ax_contrast.plot(xpos, bootsDelta['summary'], marker=summaryMarkerType, markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75) # Plot the CI. ax_contrast.plot( [xpos, xpos], [lowDelta, highDelta], color='k', alpha=0.75, # linewidth=1, linestyle='solid') # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], widths=violinWidth, showextrema=False, showmeans=False) halfviolin(v, half='right', color='k') if floatContrast: # Set reference lines if paired is False: ## First get leftmost limit of left reference group xtemp, _ = np.array(sw.collections[0].get_offsets()).T leftxlim = xtemp.min() ## Then get leftmost limit of right test group xtemp, _ = np.array(sw.collections[1].get_offsets()).T rightxlim = xtemp.min() ref = tempbs['summary'] else: leftxlim = 0 rightxlim = 0.25 ref = bootsDelta['summary'] ax_contrast.set_xlim(-0.25, 0.5) # does this work? ## zero line ax_contrast.hlines( 0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=1, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines( ref, rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=1, color=contrastEffectSizeLineColor) if paired is False: es = float(tempbs['summary']) refSum = tempbs['statistic_ref'] else: es = float(bootsDelta['summary']) refSum = statfunction(plotdat[current_tuple[0]]) ## If the effect size is positive, shift the right axis up. if es > 0: rightmin = ax_raw.get_ylim()[0] - es rightmax = ax_raw.get_ylim()[1] - es ## If the effect size is negative, shift the right axis down. elif es < 0: rightmin = ax_raw.get_ylim()[0] + es rightmax = ax_raw.get_ylim()[1] + es ax_contrast.set_ylim(rightmin, rightmax) if gsIdx > 0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, refSum, ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) if paired is False: # Set xlims so everything is properly visible! swarm_xbounds = ax_raw.get_xbound() ax_contrast.set_xbound( swarm_xbounds[0] - (summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: ax_contrast.set_xlim(-0.05, 0.25 + violinWidth) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke(bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx > 0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Get number of axes in figure for aesthetic tweaks. axesCount = len(fig.get_axes()) for i in range(0, axesCount, 2): # Set new tick labels. # The tick labels belong to the SWARM axes # for both floating and non-floating plots. # This is because `sharex` was invoked. axx = fig.axes[i] newticklabs = list() for xticklab in axx.xaxis.get_ticklabels(): t = xticklab.get_text() if paired: N = str(counts) else: N = str(counts.ix[t]) if showGroupCount: newticklabs.append(t + ' n=' + N) else: newticklabs.append(t) axx.set_xticklabels(newticklabs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx = fig.axes[i] if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if i == 0: drawback_y(axx) if i != axesCount - 2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) else: drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist( Line2D((axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) if legend is False: axx.legend().set_visible(False) else: if i == axesCount - 2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1, 1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j, i in enumerate(range(1, axesCount, 2)): axx = fig.get_axes()[i] if floatContrast is False: xleft, xright = axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft - 1, xmax=xright + 1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes, only is axesCount is 2. # Not entirely sure why I have to do this. if axesCount == 2: drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) if j == 0 and axesCount == 2: # Draw back x-axis lines connecting ticks. drawback_x(axx) # Rotate tick labels. rotateTicks(axx, tickAngle, tickAlignment) elif floatContrast is True: if paired is True: # Get the bootstrapped contrast range. lower = np.min(contrastList.ix['stat_array', j]) upper = np.max(contrastList.ix['stat_array', j]) else: lower = np.min(contrastList.ix['diffarray', j]) upper = np.max(contrastList.ix['diffarray', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get the tick interval from the left y-axis. leftticks = fig.get_axes()[i - 1].get_yticks() tickstep = leftticks[1] - leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1 = axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount > 2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower = list() upper = list() for c in range(0, len(contrastList.columns)): lower.append(np.min(contrastList.ix['bca_ci_low', c])) upper.append(np.max(contrastList.ix['bca_ci_high', c])) lower = np.min(lower) upper = np.max(upper) else: lower = contrastYlim[0] upper = contrastYlim[1] normalizeContrastY(fig, contrast_ylim=contrastYlim, show_all_yaxes=showAllYAxes) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def swarmsummary(data, x, y, idx=None, statfunction=None, violinOffset=0.1, violinWidth=0.2, figsize=(7, 7), legend=True, smoothboot=False, rawMarkerSize=10, summaryMarkerSize=12, rawMarkerType='o', summaryMarkerType='o', **kwargs): df = data # so we don't re-order the rawdata! # initialise statfunction if statfunction == None: statfunction = np.mean # calculate bootstrap list. bslist = OrderedDict() if idx is None: levs = df[x].unique() # DO NOT USE the numpy.unique() method. # It will not preserve the order of appearance of the levels. else: levs = idx for i in range(0, len(levs)): temp_df = df.loc[df[x] == levs[i]] bslist[levs[i]] = bootstrap(temp_df[y], statfunction=statfunction, smoothboot=smoothboot) bsplotlist = list(bslist.items()) # Initialise figure #sns.set_style('ticks') fig, ax = plt.subplots(figsize=figsize) sw = sns.swarmplot(data=df, x=x, y=y, order=levs, size=rawMarkerSize, marker=rawMarkerType, **kwargs) y_lims = list() for i in range(0, len(bslist)): plotbootstrap(sw.collections[i], bslist=bsplotlist[i][1], ax=ax, violinWidth=violinWidth, violinOffset=violinOffset, marker=summaryMarkerType, markersize=summaryMarkerSize, color='k', linewidth=2) # Get the y-offsets, save into a list. _, y = np.array(sw.collections[i].get_offsets()).T y_lims.append(y) # Concatenate the list of y-offsets y_lims = np.concatenate(y_lims) ax.set_ylim(0.9 * y_lims.min(), 1.1 * y_lims.max()) if legend is True: ax.legend(loc='center left', bbox_to_anchor=(1.1, 1)) elif legend is False: ax.legend().set_visible(False) sns.despine(ax=ax, trim=True) return fig, pd.DataFrame.from_dict(bslist)
def expandable_ttest( df, colorset=QUALITATIVE_COLORSET, compare="Treatment", comparisons={"Period [days]": []}, datacolumn_label="Sucrose Preference Ratio", legend_loc="best", rename_treatments={}, bp_style=True, save_as=False, ): """High-level interface for plotting of one or multiple related t-tests. Parameters ---------- df : {pandas.Dataframe, string} Pandas Dataframe containing the experimental data, or path pointing to a csv containing such data. compare : string, optional Which parameter to categorize the comparison by. Must be a column name from df. comparisons : dict, optional A dictionary, the key of which indicates which df column to generate comparison insances from. If only a subset of the available rows are to be included in the comparison, the dictionary needs to specify a value, consisting of a list of acceptable values on the column given by the key. datacolumn_label : string, optional A column name from df, the values in which column give the data to plot. legend_loc : string, optional Where to place the legend on the figure. rename_treatments : dict, optional Dictionary with strings as keys and values used to map treatment names onto new stings. bp_style : bool, optional Whether to apply the default behaviopy style. Notes ----- Seaborn's `sns.swarmplot()` does not read rcParams by itself, so we need to pass it `size=rcParams['lines.markersize']` to correctly set the marker size. """ try: if isinstance(df, basestring): df = path.abspath(path.expanduser(df)) df = pd.read_csv(df) except NameError: if isinstance(df, str): df = path.abspath(path.expanduser(df)) df = pd.read_csv(df) comparison_instances_label = list(comparisons.keys())[0] comparison_instances = list(comparisons.values())[0] if comparison_instances: df[df[comparison_instances_label].isin([comparison_instances])] if rename_treatments: for key in rename_treatments: df.loc[df["Treatment"] == key, "Treatment"] = rename_treatments[key] df = control_first_reordering(df, "Treatment") if bp_style: sns.set_style("white", {'legend.frameon': True}) plt.style.use(u'seaborn-darkgrid') plt.style.use(u'ggplot') sns.swarmplot( x=comparison_instances_label, y=datacolumn_label, hue=compare, data=df, palette=sns.color_palette(colorset), split=True, size=rcParams['lines.markersize'], ) plt.legend(loc=legend_loc, frameon=True) add_significance(df, datacolumn_label, compare=compare, over=comparison_instances_label) if save_as: plt.savefig(path.abspath(path.expanduser(save_as)), bbox_inches='tight')
def do_movement(): with figure("movement", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["path_length", "move_x", "move_y"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Path length") g.fig.axes[1].set_title("Movement in $x$") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_x"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_l", "move_r", "move_x"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Movement left") g.fig.axes[1].set_title("Movement right") g.fig.axes[2].set_title("Movement in $x$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_y"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_b", "move_f", "move_y"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Movement backwards") g.fig.axes[1].set_title("Movement forwards") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_back"): sns.factorplot(x="experiment", y="move_b", data=analyses, kind="box") sns.swarmplot(x="experiment", y="move_b", split=True, data=analyses, palette=cmap_complement) plt.ylabel("distance (m)") plt.title("Movement backwards") with figure("movement_runs", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["path_length", "move_x", "move_y"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment", capsize=0.2) g.fig.axes[0].set_title("Path length") g.fig.axes[1].set_title("Movement in $x$") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1]) with figure("movement_x_runs"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_l", "move_r", "move_x"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment") g.fig.axes[0].set_title("Movement left") g.fig.axes[1].set_title("Movement right") g.fig.axes[2].set_title("Movement in $x$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1]) with figure("movement_y_runs"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_b", "move_f", "move_y"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment") g.fig.axes[0].set_title("Movement backwards") g.fig.axes[1].set_title("Movement forwards") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1])
fits_x_half = 2 * fits_x[-100:] raw_half = 0.5 * (np.fliplr(raw_diff[:, :5]) + raw_diff[:, -5:]) fits_half = 0.5 * (np.fliplr(fits_diff[:, :100]) + fits_diff[:, -100:]) # %% Plot difference in two conditions rcParams['font.sans-serif'] = "Arial" c3 = '#332288' ms = 5 plt.figure(figsize=(8.7 / 2.54, 2.5)) angles = 20 * [1.25, 2.5, 5, 10, 20] angles += list(np.arange(1.25, 30, 1.25)) angles += 20 * [30] effect = list(raw_half.ravel()) + 23 * [None] + list(raw_half.mean(1)) data = DataFrame(data={'angles': angles, 'effect': effect}) sns.swarmplot('angles', 'effect', data=data, palette=[c3], marker='^', size=3) sns.pointplot('angles', 'effect', data=data, color=c3, join=False, markers ='^', scale=.1, capsize=.6, errwidth=1) plt.plot([0, 1, 3, 7, 15], raw_half.mean(0), marker='^', mfc='w', mec=c3, markersize=ms, lw=0, zorder=100) plt.plot([23], raw_half.mean(0).mean(0), marker='^', mfc='w', mec=c3, markersize=ms, lw=0, zorder=100) plt.xticks([0, 1, 3, 7, 15, 23], [1.25, 2.5, 5, 10, 20, 25]) plt.gca().set_xticklabels(xlabels + ['Mean']) plt.xlabel(u'Auditory Separation (°)') plt.ylabel('Performance Improvement \n (% correct)') plt.xlim([-1, 25]) plt.tight_layout() plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.subplots_adjust(left=0.175, right=1, top=0.95, bottom=0.2)
def contrastplot_test( data, x, y, idx=None, alpha=0.75, axis_title_size=None, barWidth=5, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), idcol=None, lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, show95CI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas dataframe and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. ----------------------------------------------------------------------- Description of flags upcoming.''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError("The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # Get and set levels of data[x] if idx is None: widthratio=[1] allgrps=np.sort(data[x].unique()) if paired: # If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in=tuple(allgrps[0:2],) else: # No idx is given, so all groups are compared to the first one in the DataFrame column. tuple_in=(tuple(allgrps), ) if len(allgrps)>2: floatContrast=False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) tuple_in=(idx, ) widthratio=[1] if len(idx)>2: floatContrast=False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! tuple_in=idx if ( any(len(element)>2 for element in tuple_in) ): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast=False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio=[] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting.") # initialise statfunction if statfunction == None: statfunction=np.mean # Create list to collect all the contrast DataFrames generated. contrastList=list() contrastListNames=list() # # Calculate the bootstraps according to idx. # for ix, current_tuple in enumerate(tuple_in): # bscontrast=list() # for i in range (1, len(current_tuple)): # # Note that you start from one. No need to do auto-contrast! # tempbs=bootstrap_contrast( # data=data, # x=x, # y=y, # idx=[current_tuple[0], current_tuple[i]], # statfunction=statfunction, # smoothboot=smoothboot, # reps=reps) # bscontrast.append(tempbs) # contrastList.append(tempbs) # contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol=kwargs['hue'] colGrps=data[colorCol].unique() nColors=len(colGrps) else: colorCol=x colGrps=data[x].unique() nColors=len([element for tupl in tuple_in for element in tupl]) plotPal=dict( zip( colGrps, sns.color_palette(n_colors=nColors) ) ) else: plotPal=pal # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar=True summaryLine=False # Turn off summary line if floatContrast is true if floatContrast: summaryLine=False if swarmYlim is None: # get range of _selected groups_. u = list() for t in idx: for i in np.unique(t): u.append(i) u = np.unique(u) tempdat=data[data[x].isin(u)] swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])]) else: swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]]) if contrastYlim is not None: contrastYlim=np.array([contrastYlim[0],contrastYlim[1]]) barWidth=barWidth/1000 # Not sure why have to reduce the barwidth by this much! if showRawData is True: maxSwarmSpan=0.25 else: maxSwarmSpan=barWidth # Expand the ylim in both directions. ## Find half of the range of swarm_ylim. swarmrange=swarm_ylim[1] -swarm_ylim[0] pad=0.1*swarmrange x2=np.array([swarm_ylim[0]-pad, swarm_ylim[1]+pad]) swarm_ylim=x2 # plot params if axis_title_size is None: axis_title_size=25 if yticksize is None: yticksize=18 if xticksize is None: xticksize=18 # Set clean style sns.set(style='ticks') axisTitleParams={'labelsize' : axis_title_size} xtickParams={'labelsize' : xticksize} ytickParams={'labelsize' : yticksize} svgParams={'fonttype' : 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in)>2: figsize=(12,(12/np.sqrt(2))) else: figsize=(8,(8/np.sqrt(2))) # Initialise figure, taking into account desired figsize. fig=plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain=gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0 ) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX plotdat=data[data[x].isin(current_tuple)] plotdat[x]=plotdat[x].astype("category") plotdat[x].cat.set_categories( current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # Drop all nans. plotdat=plotdat.dropna() # Calculate summaries. summaries=plotdat.groupby([x],sort=True)[y].apply(statfunction) if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot ax_raw=fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast=ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw=plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast=plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast=list() for i in range (1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! tempbs=bootstrap_contrast( data=data, x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) #### PLOT RAW DATA. if showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. ax_raw.set_ylim(swarm_ylim) sw=sns.swarmplot( data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if summaryBar is True: bar_raw=sns.barplot( x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast: # Get horizontal offset values. maxXBefore=max(sw.collections[0].get_offsets().T[0]) minXAfter=min(sw.collections[1].get_offsets().T[0]) xposAfter=maxXBefore+floatSwarmSpacer xAfterShift=minXAfter-xposAfter # shift the swarmplots offsetSwarmX(sw.collections[1], -xAfterShift) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width=bar.get_x() width=bar.get_width() centre=x_width + (width/2.) if i == 0: bar.set_x(centre-maxSwarmSpan/2.) else: bar.set_x(centre-xAfterShift-maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) ## Set the ticks locations for ax_raw. ax_raw.xaxis.set_ticks((0, xposAfter)) firstTick=ax_raw.xaxis.get_ticklabels()[0].get_text() secondTick=ax_raw.xaxis.get_ticklabels()[1].get_text() ax_raw.set_xticklabels([firstTick,#+' n='+count[firstTick], secondTick],#+' n='+count[secondTick]], rotation=tickAngle, horizontalalignment=tickAlignment) if summaryLine is True: for i, m in enumerate(summaries): ax_raw.plot( (i -summaryLineWidth, i + summaryLineWidth), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if show95CI is True: sns.barplot( data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple)==2: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) if floatContrast: # Set reference lines ## First get leftmost limit of left reference group xtemp, _=np.array(sw.collections[0].get_offsets()).T leftxlim=xtemp.min() ## Then get leftmost limit of right test group xtemp, _=np.array(sw.collections[1].get_offsets()).T rightxlim=xtemp.min() ## zero line ax_contrast.hlines(0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines(tempbs['summary'], rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=0.75, color=contrastEffectSizeLineColor) ## If the effect size is positive, shift the right axis up. if float(tempbs['summary'])>0: rightmin=ax_raw.get_ylim()[0] -float(tempbs['summary']) rightmax=ax_raw.get_ylim()[1] -float(tempbs['summary']) ## If the effect size is negative, shift the right axis down. elif float(tempbs['summary'])<0: rightmin=ax_raw.get_ylim()[0] + float(tempbs['summary']) rightmax=ax_raw.get_ylim()[1] + float(tempbs['summary']) ax_contrast.set_ylim(rightmin, rightmax) if gsIdx>0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) # Set xlims so everything is properly visible! swarm_xbounds=ax_raw.get_xbound() ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke( bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx>0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList=pd.DataFrame(contrastList).T contrastList.columns=contrastListNames ######## axesCount=len(fig.get_axes()) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx=fig.axes[i] if i!=axesCount-2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(showAllYAxes) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist(Line2D( (axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) # I don't know why the swarm axes controls the contrast axes ticks.... if showGroupCount: count=data.groupby(x).count()[y] newticks=list() for ix, t in enumerate(axx.xaxis.get_ticklabels()): t_text=t.get_text() nt=t_text+' n='+str(count[t_text]) newticks.append(nt) axx.xaxis.set_ticklabels(newticks) if legend is False: axx.legend().set_visible(False) else: if i==axesCount-2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1,1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j,i in enumerate(range(1, axesCount, 2)): axx=fig.get_axes()[i] if floatContrast is False: xleft, xright=axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft-1, xmax=xright+1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) # # Draw back x-axis lines connecting ticks. # drawback_x(axx) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) # Rotate tick labels. rotateTicks(axx,tickAngle,tickAlignment) else: # Re-draw the floating axis to the correct limits. lower=np.min(contrastList.ix['diffarray',j]) upper=np.max(contrastList.ix['diffarray',j]) meandiff=contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower>0: lower=0. if upper<0: upper=0. ## Get the tick interval from the left y-axis. leftticks=fig.get_axes()[i-1].get_yticks() tickstep=leftticks[1] -leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1=axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2=list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2)<meandiff: ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff<np.min(newticks2): ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2=np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount>2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower=list() upper=list() for c in range(0,len(contrastList.columns)): lower.append( np.min(contrastList.ix['bca_ci_low',c]) ) upper.append( np.max(contrastList.ix['bca_ci_high',c]) ) lower=np.min(lower) upper=np.max(upper) else: lower=contrastYlim[0] upper=contrastYlim[1] normalizeContrastY(fig, contrast_ylim = contrastYlim, show_all_yaxes = showAllYAxes) # if (axesCount==2 and # floatContrast is False): # drawback_x(fig.get_axes()[1]) # drawback_y(fig.get_axes()[1]) # if swarmShareY is False: # for i in range(0, axesCount, 2): # drawback_y(fig.get_axes()[i]) # if contrastShareY is False: # for i in range(1, axesCount, 2): # if floatContrast is True: # sns.despine(ax=fig.get_axes()[i], # top=True, right=False, left=True, bottom=True, # trim=True) # else: # sns.despine(ax=fig.get_axes()[i], trim=True) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList