def plot_summary_stat_boxplots_by_exp_groups( arr_df, arr_summary_stats, sample_inds=None, fig_path=None, fig_prefix=None, fig_size=(11, 11) ): """" Plot boxplots of summary stats by experimental groups Parameters: ---------- arr_df: pandas.DataFrame dataframe of array data arr_summary_stats: list list of array summary statistics for which boxplots are to be generated. sample_inds: [bool array | None (default)] defines which samples to use in boxplots (allows selecting subset). If set to None (default) will use all samples. fig_path: [string | None (default)] if set, will save figures in fig_path. if None (default) figures are not saved. fig_prefix: [string | None] if a subset of the data is plotted, allows specifying a figure name prefix for saving to files. fig_size : 2-tuple of integers default set to (11, 11) (width, height) of figure """ if sample_inds is None: sample_inds = [True] * arr_df.shape[0] if fig_prefix is None: fig_prefix = "" for assay in arr_summary_stats: f = plt.figure() f.set_size_inches(fig_size) mbp.myboxplot_by_labels(arr_df[sample_inds][assay], arr_df[sample_inds]["group"]) plt.title("".join([fig_prefix, " ", assay, "_responses"])) plt.xlabel("group #") # save to file only if save_flag is on if fig_path is not None: f.set_tight_layout(True) filename = "".join([fig_path, fig_prefix, assay, "_boxplots_by_groups.png"]) f.savefig(filename, dpi=200)
def plot_summary_stat_boxplots_by_clusters( arr_df, clusters, prot_names, arr_summary_stats, sample_inds=None, fig_prefix=None, fig_path=None, fig_size=(11, 11) ): """ Plot boxplots of summary stats by clusters Parameters: ---------- arr_df: pandas.DataFrame dataframe of array data clusters: dictionary cluster assignment of each datapoint indexed by ind_dict.keys() prot_names: list list of strings of protein antigens from which peptides are on the array_data_filename arr_summary_stats: list list of array summary statistics for which boxplots are to be generated. sample_inds: [bool array | None] boolean array specifying which samples to use. If None (default) will plot all. fig_path: [string | None (default)] if set, will save figures in fig_path. if None (default) figures are not saved. fig_size : 2-tuple of integers default set to (11, 11) (width, height) of figure """ if sample_inds is None: sample_inds = [True] * arr_df.shape[0] for p in prot_names: for assay in arr_summary_stats: f = plt.figure() f.set_size_inches(fig_size) mbp.myboxplot_by_labels(arr_df[sample_inds][assay], clusters[p]) plt.title("".join([p, " clusters ", assay])) plt.xlabel("Cluster #") num_clusters = len(np.unique(clusters[p])) # save to file only if save_flag is on if fig_path is not None: filename = "".join( [fig_path, fig_prefix, p, "_", assay, "_boxplots_by_clusters_n_", str(num_clusters), ".png"] ) f.savefig(filename, dpi=200)
f.set_tight_layout(True) f.set_size_inches(18, 11) for i, p in enumerate(['SHA_ha', 'SHA_na']): axarr[i].plot(np.arange(len(ind_dict[p])), bg_df[ind_dict[p]].T) axarr[i].set_title(p + " BSA responses " + str(i+1) + " (n = " + str(len(bg_df.shape[0])) + ")") # axarr[i].set_yticks([]) filename = "".join([FIG_PATH, p, "_BSA_responses.png"]) f.savefig(filename, dpi=20) # plot boxplots of all clusters for p in ['SHA_ha', 'SHA_na']: for assay in assays: f = figure() f.set_size_inches(18, 11) mbp.myboxplot_by_labels(arr_df[post_inds][assay], clusters[p]) plt.title("".join([p, " clusters ", assay])) plt.xlabel('Cluster #') filename = "".join([FIG_PATH, p, "_", assay, "_boxplots_by_clusters_n_", str(num_clusters), ".png"]) f.savefig(filename, dpi=200) # plot boxplots of all groups: for assay in assays + arr_summary_stats: for t in time_dict.keys(): f = figure() f.set_size_inches(18, 11) mbp.myboxplot_by_labels(arr_df[time_dict[t]][assay], arr_df[time_dict[t]]['group']) plt.title("".join([t, "_", assay, "_responses"])) plt.xlabel('group #') filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"]) f.savefig(filename, dpi=200)
for p in ["SHA_ha", "SHA_na"]: f, axarr = plt.subplots(1) f.set_tight_layout(True) f.set_size_inches(18, 11) sch.dendrogram(Z_struct[p], color_threshold=np.inf, labels=arr_df.index, orientation="left") axarr.set_title(p) filename = "".join([FIG_PATH, p, "_dendrograms.png"]) f.savefig(filename, dpi=200) # plot boxplots of all clusters for p in ["SHA_ha", "SHA_na"]: for sum_stat in arr_summary_stats: f = plt.figure() f.set_size_inches(18, 11) mbp.myboxplot_by_labels(arr_df[sum_stat], clusters[p]) plt.title("".join([p, " clusters ", sum_stat])) plt.xlabel("Cluster #") filename = "".join([FIG_PATH, p, "_", sum_stat, "_boxplots_by_clusters_n_", str(num_clusters), ".png"]) f.savefig(filename, dpi=200) # # plot boxplots of all groups: # for assay in assays + arr_summary_stats: # for t in time_dict.keys(): # f = figure() # f.set_size_inches(18, 11) # mbp.myboxplot_by_labels(arr_df[time_dict[t]][assay], arr_df[time_dict[t]]['group']) # plt.title("".join([t, "_", assay, "_responses"])) # plt.xlabel('group #') # filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"]) # f.savefig(filename, dpi=200)
diff_df.drop("modified", axis=1, inplace=True) print(diff_df) diff_df.to_csv(path_or_buf=filename, sep="\t") # now translate indices into cluster inds: - not used since code modified to include tuple of indices into dataframe as index! # c_inds = np.unravel_index(sig_df_HA.index,(num_clusters, num_clusters-1)) print("Vic HA clusters with significant differences in HAI or NT assays") print(sig_df_HA) # plot boxplots of all clusters for p in ["Vic_HA", "Vic_NA"]: for assay in Vic_assays: f = figure() mbp.myboxplot_by_labels(df[assay], clusters[p]) plt.title("".join([p, " clusters ", assay_strs[assay]])) plt.xlabel("Cluster #") filename = "".join([FIG_PATH, p, "_", assay_strs[assay], "_boxplots_by_clusters_n_", str(num_clusters), ".png"]) f.savefig(filename, dpi=200) # Plot figures for a given clustering solution - currently only performed for the H3N2 victoria strain: for p in ["Vic_HA", "Vic_NA"]: f, axarr = plt.subplots(num_clusters, 1) f.set_tight_layout(True) f.set_size_inches(18, 11) # plot clusters for i in np.arange(num_clusters): axarr[i].plot(np.arange(len(ind_dict[p])), df[ind_dict[p]].loc[clusters[p] == i + 1].T)
# plot boxplots of all groups: # only plot for PBS, Vac, and AS03: curr_df = arr_df[(arr_df.group != 'WT_post_MF59') & (arr_df.group != 'Ob_post_MF59') & (arr_df.group != 'WT_pre_MF59') & (arr_df.group != 'Ob_pre_MF59')] curr_time_dict = {} curr_time_dict['Pre'] = curr_df.group.str.contains('pre') curr_time_dict['Post'] = curr_df.group.str.contains('post') for assay in arr_summary_stats: for t in ['Post']: # time_dict.keys(): f, axarr = plt.subplots(1) f.set_tight_layout(True) mbp.myboxplot_by_labels(curr_df[curr_time_dict[t]][assay], curr_df[curr_time_dict[t]]['group']) axarr.set_title("".join([t, " ", assay.replace('_', ' '), " responses"]), fontsize=16) axarr.tick_params(axis='both', which='major', labelsize=14) axarr.set_yscale('log') filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"]) f.savefig(filename, dpi=200) filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.eps"]) f.savefig(filename, dpi=1000) # Figure 4, 5 and 6 - clustering dendrograms, median responses and summary stats of WT vs. Obese for each group: for a in ['Vac', 'AS03']: curr_inds = group_inds['Ob_post_' + a].append(group_inds['WT_post_' + a]) amp.plot_clustering_dendrograms(Z_struct=Z_struct[a], prot_names=['SHA_ha'], labels=arr_df.loc[curr_inds].group, fig_prefix=a + '_', fig_path=FIG_PATH)