def add_stats(df, variable1, variable2, ax, order): stat = dunn_posthoc_test(df, y_variable, x_variable) # label box pairs box_pairs = [ (variable1, variable2), (variable1, "control"), (variable2, "control"), ] # make empty list of p_values p_values = [] # populate the list of p_values according to the box_pairs for pair in box_pairs: print(pair) # select p value for each pair p = stat.loc[pair[0], pair[1]] p_values.append(p) # add stats annotation to the plot add_stat_annotation( ax, # plot=plot_type, data=df, x=x, y=y, order=order, box_pairs=box_pairs, text_format="star", loc="outside", verbose=2, perform_stat_test=False, pvalues=p_values, test_short_name="Dunn", )
def squat_plots(df_in, var_in, new_ylabel='Same'): boxpairs = [ (('Avg', 'b'), ('Avg', 'p')), (('Max', 'b'), ('Max', 'p')), # (('Min', 'b'), ('Min', 'p')), (('25%', 'b'), ('25%', 'p')), (('75%', 'b'), ('75%', 'p')), # (('Std', 'b'), ('Std', 'p')), ] plot_out = sns.pointplot(x='Stat', y=var_in, hue='Mode', join=False, dodge=0.25, data=df_in, split=True, palette="dark", ci='sd') add_stat_annotation(plot_out, data=df_in, x='Stat', y=var_in, hue='Mode', box_pairs=boxpairs, test='Mann-Whitney', comparisons_correction=None, text_format='star', loc='inside', verbose=1, pvalue_thresholds=[[1e-4, "**"], [1e-3, "**"], [1e-2, "**"], [0.05, "*"], [1, ""]]) plot_out.set_ylabel(new_ylabel) plot_paper_params() return plot_out
def foot_plots(df_in, var_in): boxpairs = [(('Ball-Ball Dist.', 'b'), ('Ball-Ball Dist.', 'p')), (('Heel-Heel Dist.', 'b'), ('Heel-Heel Dist.', 'p')), (('Load Line L', 'b'), ('Load Line L', 'p')), (('Load Line R', 'b'), ('Load Line R', 'p'))] plot_out = sns.pointplot(x=' ', y=var_in, hue='mode', join=False, dodge=0.25, data=df_in, split=True, palette="dark", ci='sd') add_stat_annotation(plot_out, data=df_in, x=' ', y=var_in, hue='mode', box_pairs=boxpairs, test='Mann-Whitney', comparisons_correction=None, text_format='star', loc='inside', verbose=1, pvalue_thresholds=[[1e-4, "**"], [1e-3, "**"], [1e-2, "**"], [0.05, "*"], [1, ""]]) plot_paper_params() plt.xticks(rotation=90)
def draw_violin(input_file, output_file, watch): data = pandas.read_csv(input_file) seaborn.set(context="poster", style="whitegrid") fig, ax = matplotlib.pyplot.subplots(figsize=(24, 24)) seaborn.violinplot(data=data, x="Classification", y=watch, order=general.classes) statannot.add_stat_annotation(ax, data=data, x="Classification", y=watch, box_pairs=[ (general.classes[i - 1], general.classes[i]) for i in range(1, len(general.classes)) ], test="t-test_ind", text_format="star", verbose=0, order=general.classes) fig.savefig(general.check_exist(output_file)) matplotlib.pyplot.close(fig)
def en_trans_cli_plot(gene): en_clinical_and_proteomics = en.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "transcriptomics", metadata_cols = "Proteomics_Tumor_Normal", omics_genes = gene) en_clinical_and_proteomics.head() ## Show possible variations of Histologic_type en_clinical_and_proteomics["Proteomics_Tumor_Normal"].unique() sns.set(style ="white", font_scale = 1.5) ax = sns.boxplot(x = "Proteomics_Tumor_Normal", y = gene + '_transcriptomics', data = en_clinical_and_proteomics, showfliers = False) sns.stripplot(x = "Proteomics_Tumor_Normal", y = gene + '_transcriptomics', data = en_clinical_and_proteomics, color = '.3') add_stat_annotation(ax, data = en_clinical_and_proteomics, x = "Proteomics_Tumor_Normal", y = gene + '_transcriptomics', boxPairList = [("Tumor", "Adjacent_normal")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('endometrial cancer')
def test_sig_boxplotAccuracy(self): # print(self.acc.columns) fig, ax3 = plt.subplots(figsize=(9, 7)) sns.set_style("whitegrid") ax3 = sns.boxplot(data=self.acc, color="white") ax3 = sns.swarmplot(data=self.acc, color=".25") # plt.ylabel("single ELMs Accuracy") # plt.ylabel("Boosting ELM Accuracy") # plt.ylabel("Bagging ELM Accuracy") # plt.ylabel("Majority ELM Accuracy") plt.ylabel("Accuracy Comparison") # statistical notation add_stat_annotation(ax3, data=self.acc, box_pairs=[("single ELMs", "majority voting ELMs"), ("single ELMs", "bagging-based ELMs"), ("single ELMs", "boosting-based ELMs"), ("majority voting ELMs", "boosting-based ELMs"), ("bagging-based ELMs", "boosting-based ELMs"), ("boosting-based ELMs", "SVMs")], test='t-test_ind', text_format='star', loc='inside', verbose=2) # plt.show(ax3) # fig.savefig('fig2SingleELMAccuracyStats.png') # fig.savefig('fig3BaggingELMAccuracyStats.png') # fig.savefig('fig4BoostingELMAccuracyStats.png') # fig.savefig('fig5MajorityELMAccuracyStats.png') fig.savefig('fig6comparisonELMSVMAccuracyStats.png')
def plot_clinical_status_vs_esm_params(res, output_dir): plt.figure(figsize=(10, 13)) nrows = 2 ncols = 2 params = ["BETAS_est", "DELTAS_est", "BDR_log", "PUP_ROI_AB_Mean"] face_pal = {"No": "cornflowerblue", "Yes": "indianred"} titles = [ "Production Rate", "Clearance Rate", "Prod/Clear Ratio (Log)", "Amyloid Beta" ] for i, param in enumerate(params): j = i + 1 plt.subplot(nrows, ncols, j) g = sns.boxplot(x="Symptomatic", y=param, data=res[res.AB_Positive == "Yes"], palette=face_pal) add_stat_annotation(g, data=res[res.AB_Positive == "Yes"], x="Symptomatic", y=param, box_pairs=[("Yes", "No")], test='t-test_ind', text_format='star', loc='inside', verbose=2, fontsize=18) plt.ylabel("") plt.title(titles[i], fontsize=22) plt.xticks(fontsize=18) plt.yticks(fontsize=18) plt.xlabel("Symptomatic", fontsize=18) plt.tight_layout() plt.savefig(os.path.join(output_dir, "clinical_status_vs_esm_params.png"))
def plot_swap_acc(ax, path, parameter_configs): global data data = _prepare_data_swapacc(path, parameter_configs) data.sort_values(by="Agent", inplace=True) return sns.barplot( data=data, x="Agent", y="Swap acc.", ax=ax, edgecolor=".2", capsize=0.01, errwidth=1.5, ) add_stat_annotation( ax, data=data, x="Agent", y="Swap acc.", test="t-test_welch", line_height=0.02, line_offset_to_box=0.04, box_pairs=[("DTI", "AE+MTM")], )
def cat_plot(type, x, y, file_name, hue=None): df_cat = pd.read_excel( r'C:\Users\chaob\Documents\Biopsy Heterogeneity Data Sheet.xlsx', sheet_name='Violin Plot') fig, ax = plt.subplots(figsize=(5, 6)) if type == 'violin': sns.violinplot(x=x, y=y, hue=hue, palette='pastel', data=df_cat, ax=ax) fig.tight_layout() fig.savefig(file_name, dpi=200) if type == 'box': sns.boxplot(x=x, y=y, hue=hue, saturation=0.5, showfliers=False, palette='pastel', data=df_cat, ax=ax) ax.xaxis.labelpad = 15 ax.yaxis.labelpad = 5 ax.set_xlabel(ax.get_xlabel(), fontsize=13) ax.set_ylabel(ax.get_ylabel(), fontsize=13) ax.tick_params(axis='both', which='major', labelsize=10.5) xlabels = [l.get_text() for l in ax.get_xticklabels()] if hue is None: box_pairs = list(itertools.combinations(xlabels, 2)) sns.stripplot(x=x, y=y, hue=hue, s=3, data=df_cat, alpha=0.6, palette='tab10', ax=ax) else: huelabels = df_cat[hue].unique().tolist() hue_pairs = list(itertools.combinations(huelabels, 2)) box_pairs = [] for xlabel in xlabels: for hue1, hue2 in hue_pairs: pair = ((xlabel, hue1), (xlabel, hue2)) box_pairs.append(pair) add_stat_annotation(ax, data=df_cat, x=x, y=y, hue=hue, box_pairs=box_pairs, perform_stat_test=True, test='t-test_welch', loc='inside', verbose=0, no_ns=True, fontsize='large') fig.tight_layout() fig.savefig(file_name, dpi=200)
def main(dname, out_dir): # prepare out_dir.mkdir(parents=True, exist_ok=True) df = pd.read_csv(dname / 'measures.csv') # print statistics print(df.groupby('method').count()) print(df.groupby(['method'])['roc_auc'].median()) print(df.groupby(['method'])['roc_auc'].std()) # aggregated plot fig, ax = plt.subplots(figsize=(8, 6)) sns.boxplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor']) for patch in ax.artists: r, g, b, a = patch.get_facecolor() patch.set_facecolor((r, g, b, 0.3)) sns.stripplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor']) statannot.add_stat_annotation( ax, data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor'], box_pairs=[('dce', 'cor'), ('dce', 'pcor')], test='Wilcoxon', text_format='simple', loc='outside', verbose=2, ) ax.set_xlabel('Method') ax.set_ylabel('ROC-AUC') fig.tight_layout() fig.savefig(out_dir / 'method_comparison.pdf') # stratified plot g = sns.catplot( data=df, x='method', y='roc_auc', hue='perturbed_gene', row='treatment', kind='box', hue_order=natsorted(df['perturbed_gene'].unique()), aspect=2, ) g.map( lambda **kwargs: plt.axhline(0.5, ls='dashed', color='gray', alpha=1, zorder=-1) ) g.set_axis_labels('Method', 'ROC-AUC') g._legend.set_title('Perturbed gene(s)') g.savefig(out_dir / 'method_comparison_stratified.pdf')
def plot_pubtator_clean(): sysid_primary = pd.read_csv(ROOT_DIR + "sysid/sysid_primary.csv", usecols=["Entrez id"])["Entrez id"].to_list() sysid_candidates = pd.read_csv(ROOT_DIR + "sysid/sysid_candidates.csv", usecols=["Entrez id" ])["Entrez id"].to_list() princeton_negative = pd.read_csv( ROOT_DIR + "ASD_translated_to_ensembl.csv")["gene id"].to_list() pubtator = pd.read_csv( ROOT_DIR + "pubtator_central/gene_scores/gene_scores_p_cutoff_0,0001_clean.csv") pubtator["sys_primary"] = pubtator.gene_id.isin(sysid_primary).astype(int) pubtator["sys_candidate"] = pubtator.gene_id.isin(sysid_candidates).astype( int) pubtator["sys"] = pubtator.gene_id.isin(sysid_primary + sysid_candidates).astype(int) pubtator["sys_category"] = "unknown" pubtator.loc[pubtator.sys_candidate == 1, "sys_category"] = "candidate" pubtator.loc[pubtator.sys_primary == 1, "sys_category"] = "known NDD" pubtator.loc[pubtator.gene_id.isin(princeton_negative), "sys_category"] = "negative control" order = ["unknown", "negative control", "candidate", "known NDD"] ax = plt.figure(figsize=(6, 6)) ax = sns.boxplot(x="sys_category", y="gene_score", data=pubtator, showfliers=False, order=order) add_stat_annotation(ax, data=pubtator, x="sys_category", y="gene_score", order=order, box_pairs=[("unknown", "candidate"), ("candidate", "negative control"), ("candidate", "known NDD")], test='Mann-Whitney', text_format='simple', loc='outside', line_offset_to_box=0.001, line_height=0.05, text_offset=2, verbose=2) ax.set(ylim=(0, 1800)) # ax.set(ylim=(0, 0.07)) ax.set_title(f"pubtator gene scores") ax.set_xlabel("SysID category") ax.set_ylabel(f"gene score") ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.get_figure().savefig(ROOT_DIR + f"pubtator_central/plot_pubtator_clean.png") plt.show()
def box_zoom(factors, sel, hue, b, color_df, palette=None, stat=False): """Display boxplot for a given TCA-component associated trial factor Arguments: factors {list} -- list of 3 arrays containing the TCA factors sel {scalar} -- component selected hue {string} -- entry in the color_df to color code trial factors b {tuple} -- color list coded for learning, list of each block boundaries, list of each first day trial color_df {pandas dataframe} -- columns [Odor, Reward, Day, Behavior] with color coded Keyword Arguments: palette {list} -- color palette for plotting (default: {['red', 'black']}) stat {bool} -- add significance test stars (default: {False}) """ box_df = make_box_df(factors, sel, b, color_df) n_blocks = len(b[1]) if palette is None: palette = ['red', 'black'] plt.rcParams['figure.figsize'] = 14, 6 #fig = plt.figure(figsize=(14, 6)) ax = sns.boxplot(x="Block", y="Trial Factor", hue=hue, data=box_df, palette=palette, dodge=False, linewidth=2, fliersize=2, width=.3) xmin, xmax, ymin, ymax = ax.axis() if stat: c1, c2 = box_df[hue].unique()[:2] for i in range(n_blocks): if len(box_df[hue][box_df['Block'] == i].unique()) < 2: continue sta.add_stat_annotation(ax, data=box_df, x="Block", y="Trial Factor", hue=hue, boxPairList=[((i, c1), (i, c2))], test='t-test', textFormat='star', loc='inside', fontsize='large', lineYOffsetAxesCoord=0.05, linewidth=0, verbose=0) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax * 1.15) plt.show()
def violin_plot(data, colname, ymax, fname): """ Plot a violin plot with the length of each read by novelty category""" sns.set_context("paper", font_scale=1.3) ax = sns.stripplot(x='DE_type', y=colname, data=data, color="black", alpha=0.5, size=1.5, jitter=True) ax = sns.boxplot(x='DE_type', y=colname, data=data, palette="Blues") add_stat_annotation(ax, data=data, x='DE_type', y=colname, box_pairs=[("Higher in Illumina", "Higher in PacBio"), ("Higher in Illumina", "Not DE"), ("Higher in PacBio", "Not DE")], test='Mann-Whitney', text_format='star', loc='outside', verbose=2) #ax = sns.violinplot(x='DE_type', y=colname, legend = False, # data=data, # #order=cat_order, # linewidth = 1, # inner = 'box', cut = 0) # Calculate number of obs per group & position labels nobs = list(data.groupby("DE_type").size()) nobs = [str(x) for x in nobs] nobs = ["n=" + i for i in nobs] # Add it to the plot ypos = data.groupby(['DE_type'])[colname].max().dropna().values pos = range(len(nobs)) for tick, label in zip(pos, ax.get_xticklabels()): ax.text(pos[tick], ypos[tick] + ypos[tick] * 0.1, nobs[tick], horizontalalignment='center', size='x-small', color='black', weight='semibold') ax.legend().set_visible(False) plt.xlabel("") plt.ylabel("GC percentage of gene") #ymin = min(data.groupby(['transcript_novelty'])['read_length'].min().values) plt.ylim(0, 100) plt.tight_layout() plt.savefig(fname, dpi=600, bbox_inches='tight') plt.close()
def plot(self): x, y, hue, order, hue_order, box_pairs = self.x, self.y, self.hue, self.order, self.hue_order, self.box_pairs if not isinstance(self.x, str) and not isinstance(self.y, str): vis_df = pd.DataFrame() vis_df['x'] = x vis_df['y'] = y x = 'x' y = 'y' if self.hue is not None: vis_df['colour'] = self.hue hue = 'colour' if order is None: order = list(set(vis_df['x'].values)) order.sort() else: vis_df = self.df # set the orders if hue_order is None and hue is not None: hue_order = list(set(vis_df[hue].values)) hue_order.sort() if order is None: order = list(set(vis_df[x].values)) order.sort() ax = sns.violinplot(data=vis_df, x=x, y=y, hue=hue, hue_order=hue_order, order=order, palette=self.palette, showfliers=self.showfliers) if self.add_dots: ax = sns.stripplot(data=vis_df, x=x, y=y, hue_order=hue_order, order=order, alpha=0.9, s=1, color='.2') if self.add_stats: # Add all pairs in the order if the box pairs is none pairs = [] if box_pairs is None: box_pairs = [] for i in order: for j in order: if i != j: # Ensure we don't get duplicates pair = f'{i}{j}' if i < j else f'{j}{i}' if pair not in pairs: box_pairs.append((i, j)) pairs.append(pair) # Add stats annotation add_stat_annotation(ax, data=vis_df, x=x, y=y, order=order, box_pairs=box_pairs, test=self.stat_method, text_format='star', loc='inside', verbose=2, pvalue_thresholds=[[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"]]) ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right') ax.tick_params(labelsize=self.label_font_size) plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., fontsize=self.label_font_size) self.add_labels() self.set_ax_params(ax) plt.tight_layout() return ax
def plot_perspective(ax, path_persp, path_no_persp, parameter_configs_p, parameter_configs_nop): if path_persp == "results/gridsweep": print("Getting data from gridsweep") data_perp = _prepare_data_perspective_grid() data_no_perp = _prepare_data_noperspective_grid() else: print("Getting data NOT from gridsweep") data_perp = _prepare_data_perspective(path_persp, parameter_configs_p) data_no_perp = _prepare_data_perspective(path_no_persp, parameter_configs_nop) ax.set_ylim((0.5, 1)) data_no_perp["Perspective"] = "No" data_no_perp.loc[data_no_perp["Agent"] == "AE+MTM", "Value"] += 0.002 data_perp["Perspective"] = "Yes" data = pd.concat([data_perp, data_no_perp]) data.sort_values(by="Agent", inplace=True) print(data) sns.barplot( data=data, x="Perspective", y="Value", hue="Agent", edgecolor=".2", capsize=0.01, errwidth=1.5, ax=ax, ) remove_legend_titles(ax) add_stat_annotation( ax, line_height=0.02, line_offset_to_box=0.04, data=data, x="Perspective", y="Value", hue="Agent", test="t-test_welch", box_pairs=[ (("Yes", "AE"), ("Yes", "DTI")), (("Yes", "AE"), ("Yes", "AE+MTM")), (("No", "AE"), ("No", "DTI")), (("No", "AE"), ("No", "AE+MTM")), ], ) change_width_(ax, 0.22) ax.set_ylabel(r"Accuracy (\%)")
def col_pho_cliplot(gene): col_clinical_and_proteomics = col.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", metadata_cols = "Stage") col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal") col_clinical_and_proteomics.head() ## Show possible variations of Histologic_type col_clinical_and_proteomics["Stage"].unique() PhosphoSite = list(col_clinical_and_proteomics.filter(like = gene).columns.values.tolist()) for i in PhosphoSite: try: print(i) col_clinical_and_proteomics = col.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", metadata_cols = "Stage") col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal") col_clinical_and_proteomics = col_clinical_and_proteomics.dropna(subset = [i]) plt.figure() sns.set(style ="white", font_scale = 1.5) order = ["Normal", "Stage I", "Stage II", "Stage III", "Stage IV"] ax = sns.boxplot(x = "Stage", y = i, data = col_clinical_and_proteomics, showfliers = False, order = order) sns.stripplot(x = "Stage", y = i, data = col_clinical_and_proteomics, color = '.3', order = order) add_stat_annotation(ax, data = col_clinical_and_proteomics, x = "Stage", y = i, order = order, boxPairList = [("Normal", "Stage I"), ("Normal", "Stage II"), ("Normal", "Stage III"), ("Normal", "Stage IV")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('colon cancer') except: ValueError pass
def annotate_anova(ax, data, y, anova_path, anova_sheet): df = pd.read_excel(anova_path, sheet_name=anova_sheet, index_col=0) df = df[y.split(' ')[0]] pvalues = [] box_pairs = [] for x in df.index: p = df[x] if p < 0.05: pvalues.append(p) box_pairs.append(((x, 'RU'), (x, 'LL'))) add_stat_annotation(ax, data=data, x='EVLP ID', y=y, hue='Location', box_pairs=box_pairs, pvalues=pvalues, perform_stat_test=False, loc='outside', verbose=0)
def ovcliplot(gene): ov_clinical_and_proteomics = ov.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", # metadata_cols = "Tumor_Stage_Ovary_FIGO", omics_genes = gene) ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"] = ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].fillna("Normal") ov_clinical_and_proteomics.head() ## Show possible variations of Histologic_type ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].unique() PhosphoSite = list(ov_clinical_and_proteomics.filter(like = gene).columns.values.tolist()) for i in PhosphoSite: print(i) ov_clinical_and_proteomics = ov.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", # metadata_cols = "Tumor_Stage_Ovary_FIGO", omics_genes = gene) ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"] = ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].fillna("Normal") # ov_clinical_and_proteomics = ov_clinical_and_proteomics.dropna(subset = [i]) plt.figure() sns.set_style("white") order = ["Normal", "IIIA", "IIIB", "IIIC", "IV"] ax = sns.boxplot(x = "Tumor_Stage_Ovary_FIGO", y = i, data = ov_clinical_and_proteomics, showfliers = False, order = order) sns.stripplot(x = "Tumor_Stage_Ovary_FIGO", y = i, data = ov_clinical_and_proteomics, color = '.3', order = order) add_stat_annotation(ax, data = ov_clinical_and_proteomics, x = "Tumor_Stage_Ovary_FIGO", y = i, order = order, boxPairList = [("Normal", "IIIA"), ("Normal", "IIIB"), ("Normal", "IIIC"), ("Normal", "IV")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('ovarian cancer')
def add_annotation(ax, results_df, all_pairs, metric, box_pairs): """Add annotation for pairwise statistical tests to box plots.""" import itertools as it from statannot import add_stat_annotation # do rank-based tests for all pairs, with Bonferroni correction pairwise_tests_df = _pairwise_compare(results_df, all_pairs, metric) # specify statistical tests to plot box_pvals = (pairwise_tests_df.set_index( ['data_type_1', 'data_type_2']).loc[box_pairs, :]).corr_pval.values # only display nearby pairs _ = add_stat_annotation(ax, data=results_df.sort_values(by='gene'), x='training_data', y='delta_mean', order=all_pairs, box_pairs=box_pairs, perform_stat_test=False, pvalues=box_pvals, pvalue_thresholds=[(1e-3, '***'), (1e-2, '**'), (0.05, '*'), (1, 'ns')], text_format='star', loc='inside', verbose=0, fontsize=16) return pairwise_tests_df
def sns_violinplot(dd, my_pal, figname, plot_xlabels, x="Gene", y='value', hue=None, no_legend=True, rotation=0, annot=False): fig = plt.figure(figsize=(10, 10)) ax = sns.violinplot(x=x, y=y, data=dd, hue=hue, palette=my_pal) gene_list = constants.analysis_config['MRNA_GENES'] if annot: box_pairs = [] for i in range(1, len(gene_list) + 1): if i % 2 == 0: box_pairs.append(((gene_list[i - 2], gene_list[i - 1]))) add_stat_annotation(ax, data=dd, x=x, y=y, hue=hue, box_pairs=box_pairs, test='t-test_ind', text_format='star', loc='inside', verbose=2) ax.set_xlabel("") ax.set_ylabel("") ax.yaxis.grid(which="major", color='black', linestyle='-', linewidth=0.25) ax.tick_params(right=False, top=False, direction='out', length=8, width=3, colors='black') ax.spines['left'].set_linewidth(3) ax.set_xticklabels(plot_xlabels, rotation=rotation) plt.yticks(fontsize=30) plt.xticks(fontsize=20) plt.gcf().subplots_adjust(bottom=0.2, left=0.2) if no_legend: ax.legend_.remove() fig.savefig(figname, format='png', dpi=600) plt.close()
def plot_param_diff_acc_status(esm_res, output_dir): sns.set_style("whitegrid", {'axes.grid': False}) yaxis_labels = [ "Deltas (Clearance Parameter)", "Betas (Production Parameter)", "Beta Delta Ratio (Log)" ] plt.figure(figsize=(19, 7)) nrows = 1 ncols = 3 titles = ["Clearance", "Production", "Production/Clearance"] for i, y in enumerate(["DELTAS_est", "BETAS_est", "BDR_log"]): j = i + 1 plt.subplot(nrows, ncols, j) yaxis_label = yaxis_labels[i] pal = {"No": "mediumblue", "Yes": "red"} face_pal = {"No": "cornflowerblue", "Yes": "indianred"} y = y x = "Accumulator" data = esm_res[esm_res.Mutation == 1] g = sns.boxplot(data=data, x=x, y=y, palette=face_pal, fliersize=0) sns.stripplot(x=x, y=y, data=data, jitter=True, dodge=True, linewidth=0.5, palette=pal) g.set_xticklabels(["Non-accumulator", "Accumulator"], fontsize=24) add_stat_annotation(g, data=data, x=x, y=y, box_pairs=[("No", "Yes")], test='t-test_ind', text_format='star', loc='inside', verbose=2, fontsize=18) plt.xlabel("", fontsize=24) plt.ylabel("", fontsize=18) plt.title(titles[i], fontsize=24) plt.rc('xtick', labelsize=24) plt.rc('ytick', labelsize=24) plt.tight_layout() plt.savefig(os.path.join(output_dir, "param_diff_acc_status.png")) plt.close()
def ov_pho_cli_plot(gene): ov_clinical_and_proteomics = ov.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", metadata_cols = "Sample_Tumor_Normal", omics_genes = gene) ov_clinical_and_proteomics.head() ov_clinical_and_proteomics = ov_clinical_and_proteomics.loc[:, ~ov_clinical_and_proteomics.columns.duplicated()] ## Show possible variations of Histologic_type ov_clinical_and_proteomics["Sample_Tumor_Normal"].unique() Genes = list(ov_clinical_and_proteomics.filter(like = gene).columns.values.tolist()) for i in Genes: print(i) ov_clinical_and_proteomics = ov.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics", metadata_cols = "Sample_Tumor_Normal", omics_genes = gene) ov_clinical_and_proteomics = ov_clinical_and_proteomics.loc[:, ~ov_clinical_and_proteomics.columns.duplicated()] ov_clinical_and_proteomics = ov_clinical_and_proteomics.dropna(subset = [i]) plt.figure() sns.set_style("white") order = ["Normal", "Tumor"] ax = sns.boxplot(x = "Sample_Tumor_Normal", y = i, data = ov_clinical_and_proteomics, showfliers = False, order = order) sns.stripplot(x = "Sample_Tumor_Normal", y = i, data = ov_clinical_and_proteomics, color = '.3', order = order) add_stat_annotation(ax, data = ov_clinical_and_proteomics, x = "Sample_Tumor_Normal", y = i, order = order, boxPairList = [("Normal", "Tumor")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('ovarian cancer')
def analyze_whole_chr_anp_frac_samplewise_all(wgd_plus_fracs, wgd_minus_fracs): wgd_status = ["WGD+"] * len(wgd_plus_fracs.index) + ["WGD-"] * len( wgd_minus_fracs.index) print(wgd_plus_fracs.head()) print(wgd_minus_fracs.head()) df_all = pd.concat([wgd_plus_fracs, wgd_minus_fracs], axis=0, ignore_index=True) # df_all = pd.concat([df_all, pd.Series(wgd_status)], axis = 1) df_all["wgd_status"] = wgd_status print(df_all.head()) df_all.to_csv(os.path.join(src_folder, "Type_wise_df_wca_frac.tsv"), sep="\t", index=0) plt.figure(figsize=(30, 10)) ax = sns.boxplot( x="wgd_status", y="wca_frac", data=df_all, hue="wgd_status", palette="Set1", showfliers=False, ) ax.set_ylim((0, 1.1)) ax.set_xticklabels(ax.get_xticklabels(), rotation=90) pairs = [("WGD+", "WGD-")] add_stat_annotation( ax, x="wgd_status", y="wca_frac", data=df_all, box_pairs=pairs, test="t-test_ind", text_format="star", loc="inside", verbose=0, ) plt.show()
def add_annot(data, gene_list, ax, test): dd = pd.DataFrame(dict([ (k, pd.Series(v)) for k, v in data.items() ])).melt().dropna().rename(columns={"variable": "gene"}) box_pairs = [] for i in range(1, len(gene_list) + 1): if i % 2 == 0: box_pairs.append(tuple((gene_list[i - 2], gene_list[i - 1]))) # test value should be one of the following: add_stat_annotation(ax, data=dd, x='gene', y='value', hue=None, box_pairs=box_pairs, test=test, text_format='star', loc='inside', verbose=2)
def barplots(alphas, betas, thetas): sns.set(style='white', font_scale=2) p_values = [] list = [thetas, alphas, betas] for l, frequency in enumerate(['theta', 'alpha', 'beta']): p_values.append( stats.ttest_ind(list[l][0], list[l][1], equal_var=False)[1]) p_values = np.array(p_values) * len(p_values) dataframe = pd.DataFrame() dataframe['States'] = ['slow'] * len(alphas[0]) + ['fast'] * len(alphas[1]) dataframe['Alpha'] = alphas[0] + alphas[1] dataframe['Theta'] = thetas[0] + thetas[1] dataframe['Beta'] = betas[0] + betas[1] fig, ax = plt.subplots(1, 3, figsize=(17, 20), squeeze=False) place = {0: (0, 0), 1: (0, 1), 2: (0, 2)} for l, frequency in enumerate(['Theta', 'Alpha', 'Beta']): sns.barplot(ax=ax[place[l]], y=frequency, x='States', data=dataframe, capsize=0.1).set(xlabel='', ylabel=frequency) add_stat_annotation(ax[place[l]], y=frequency, x='States', data=dataframe, box_pairs=[('slow', 'fast')], perform_stat_test=False, pvalues=[p_values[l]], text_format='star', loc='outside', verbose=2, comparisons_correction=None, line_offset=0.02, text_offset=0.01) fig.subplots_adjust(wspace=0.38, hspace=0.62) fig.show() return p_values
def en_phos_cli_plot(gene): en_clinical_and_proteomics = en.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "phosphoproteomics_gene", metadata_cols = "tumor_Stage-Pathological", omics_genes = gene) en_clinical_and_proteomics["tumor_Stage-Pathological"] = en_clinical_and_proteomics["tumor_Stage-Pathological"].fillna("Normal") en_clinical_and_proteomics.head() ## Show possible variations of Histologic_type en_clinical_and_proteomics["tumor_Stage-Pathological"].unique() PhosphoSite = list(en_clinical_and_proteomics.filter(like = gene).columns.values.tolist()) for i in PhosphoSite: print(i) en_clinical_and_proteomics = en_clinical_and_proteomics.dropna(subset = [i]) plt.figure() sns.set(style ="white", font_scale = 1.5) order = ["Normal", "Stage I", "Stage II", "Stage III", "Stage IV"] ax = sns.boxplot(x = "tumor_Stage-Pathological", y = i, data = en_clinical_and_proteomics, showfliers = False, order = order) sns.stripplot(x = "tumor_Stage-Pathological", y = i, data = en_clinical_and_proteomics, color = '.3', order = order) add_stat_annotation(ax, data = en_clinical_and_proteomics, x = "tumor_Stage-Pathological", y = i, order = order, boxPairList = [("Normal", "Stage I"), ("Normal", "Stage II"), ("Normal", "Stage III"), ("Normal", "Stage IV")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('endometrial cancer')
def plot_allele_frequency(df: pd.DataFrame, fname: str, af_col: str = "gnomADg_AF"): """ Plots allele frequencies for each class :param pd.DataFrame df: Input df :param str fname: Output basemame :param str af_col: Column name that accounts for allele frequencies. Default: `gnomAD_genomes`. If column does not exist, analysis will be skipped. """ if af_col not in df.columns: return df['grouper'] = df['outcome'].astype( str) + '\nN = ' + df['count_class'].astype(str) order = sorted(list(df['grouper'].unique())) ax = sns.boxplot(data=df, x="grouper", order=order, y=af_col) try: add_stat_annotation(ax, data=df, x="grouper", y=af_col, order=order, box_pairs=[tuple(order)], test='Mann-Whitney', text_format='star', loc='inside', verbose=0, pvalue_format_string='{:.4f}') plt.xlabel("") plt.ylabel("Allele frequency") plt.tight_layout() out = fname + '.pdf' plt.savefig(out) plt.close() except ValueError: plt.close() pass
def boxes(self): plt.clf() tests_combinations = list(combinations(self.experiments, 2)) full_data_agreg = pd.read_csv(f'{self.dir}{self.anal}_full_data.csv') full_data_agreg = full_data_agreg[full_data_agreg['experiment'].isin( self.experiments)] full_data_agreg = full_data_agreg[full_data_agreg['episode'] == full_data_agreg["episode"].max()] print(full_data_agreg) for idx_measure, measure in enumerate(self.measures): sb.set() sb.set_style("whitegrid") plot = sb.boxplot(x='experiment', y=measure, data=full_data_agreg, palette=self.clrs) # hue='Style', #remove bonferroni correction? if len(tests_combinations) > 0: add_stat_annotation( plot, data=full_data_agreg, x='experiment', y=measure, # order=order, box_pairs=tests_combinations, test='Wilcoxon', text_format='star', loc='inside', verbose=2) plt.title(self.anal) plot.get_figure().savefig( f'{self.dir}{self.anal}_{measure}_box.png') plt.clf()
def col_tra_cli_plot(gene): col_clinical_and_proteomics = col.join_metadata_to_omics( metadata_df_name = "clinical", omics_df_name = "transcriptomics", metadata_cols = "Stage", omics_genes = gene) col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal") col_clinical_and_proteomics.head() ## Show possible variations of Histologic_type col_clinical_and_proteomics["Stage"].unique() sns.set(style ="white", font_scale = 1.5) order = ["Normal", "Stage I", "Stage II", "Stage III", "Stage IV"] ax = sns.boxplot(x = "Stage", y = gene + '_transcriptomics', data = col_clinical_and_proteomics, showfliers = False, order = order) ax = sns.stripplot(x = "Stage", y = gene + '_transcriptomics', data = col_clinical_and_proteomics, color = '.3', order = order) add_stat_annotation(ax, data = col_clinical_and_proteomics, x = "Stage", y = gene + '_transcriptomics', order = order, boxPairList = [("Normal", "Stage I"), ("Normal", "Stage II"), ("Normal", "Stage III"), ("Normal", "Stage IV")], test = 't-test_ind', textFormat = 'star', loc = 'inside', verbose = 2) plt.title('colon cancer')
def plot_agreement(ax, path): data = _prepare_data_agreement(path) data.sort_values(by="Agent", inplace=True) sns.barplot( data=data, ax=ax, x="Agent", y="Agreement", edgecolor=".2", capsize=0.01, errwidth=1.5, ) add_stat_annotation( ax, data=data, x="Agent", y="Agreement", test="t-test_welch", line_height=0.02, line_offset_to_box=0.04, box_pairs=[("DTI", "AE+MTM")], )