def alpha_frequency(output_dir: str, tables: biom.Table, metadata_column: str = None, metadata: qiime2.Metadata = None, palette: str = 'husl', style: str = 'white', context: str = 'paper', # plot_type: str = 'all', labels : str = None) -> None: #first 2 tables sample_frequencies1 = _frequencies(tables[0], axis='sample') sample_frequencies2 = _frequencies(tables[1], axis='sample') sample_frequencies1.sort_values(inplace=True, ascending=False) sample_frequencies2.sort_values(inplace=True, ascending=False) sample_frequencies_df1 = sample_frequencies1.to_frame() sample_frequencies_df2 = sample_frequencies2.to_frame() sample_frequencies_df1.index.name = "sample-id" sample_frequencies_df1.reset_index(inplace=True) sample_frequencies_df2.index.name = "sample-id" sample_frequencies_df2.reset_index(inplace=True) #if not metadata & labels: # raise ValueError("Metadata file was not provided") #if no labels are given, label the inputs as numbers if not labels: merged = pd.merge(sample_frequencies_df1, sample_frequencies_df2, on = "sample-id") merged = merged.rename(columns = {'0_x':'1', '0_y':'2'}) vars_to_plot = ['1','2'] #if verbose print('Labeling columns ...') if len(tables)>2: for i in range((len(tables))-2) : sample_frequencies = _frequencies(tables[i+2], axis='sample') sample_frequencies.sort_values(inplace=True, ascending=False) sample_frequencies_df = sample_frequencies.to_frame() sample_frequencies_df.index.name = "sample-id" sample_frequencies_df.reset_index(inplace=True) merged = pd.merge(merged, sample_frequencies_df, on = "sample-id") merged = merged.rename(columns = {0:(i+3)}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) else: if len(labels) != len(tables): raise ValueError("The number of labels is different than the number of tables") merged = pd.merge(sample_frequencies_df1, sample_frequencies_df2, on = "sample-id") merged = merged.rename(columns = {'0_x':labels[0], '0_y':labels[1]}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) # if verbose: print('Labeling columns ...') if len(tables)>2: for i in range((len(tables))-2) : sample_frequencies = _frequencies(tables[i+2], axis='sample') sample_frequencies.sort_values(inplace=True, ascending=False) sample_frequencies_df = sample_frequencies.to_frame() sample_frequencies_df.index.name = "sample-id" sample_frequencies_df.reset_index(inplace=True) merged = pd.merge(merged, sample_frequencies_df, on = "sample-id") merged = merged.rename(columns = {0:labels[i+2]}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) melted_merged = pd.melt(merged, id_vars = 'sample-id') if not metadata: melted_merged = pd.melt(merged, id_vars = 'sample-id') melted_merged = melted_merged.rename(columns = {'variable':'Table', 'value':'Sequencing Depth'}) table_preview = melted_merged.to_html() with open('melted_merged.html', 'w') as file: file.write(table_preview) table_preview2 = merged.to_html() with open('merged.html', 'w') as file: file.write(table_preview2) sns.set_style(style) sns.set_context(context) pairplot_frequency = sns.pairplot(merged, vars = vars_to_plot, palette = palette) pairplot_frequency.savefig(os.path.join(output_dir, 'pairplot_frequency.png')) pairplot_frequency.savefig(os.path.join(output_dir, 'pairplot_frequency.pdf')) plt.gcf().clear() print('Plotting pairplot ...') #raincloud_frequency = pt.RainCloud( x = 'Table', y = 'Sequencing Depth', data = melted_merged, # orient = 'h', alpha = 0.65, palette = palette ) #raincloud_frequency.figure.savefig(os.path.join(output_dir, 'raincloud.png'), bbox_inches = 'tight') #raincloud_frequency.figure.savefig(os.path.join(output_dir, 'raincloud.pdf'), bbox_inches = 'tight') plt.gcf().clear() print('Plotting raincloud ...') boxplot_frequency = sns.boxplot(data=melted_merged,x='Table',y='Sequencing Depth', palette = palette, saturation = 1) boxplot_frequency.figure.savefig(os.path.join(output_dir, 'boxplot.png'), bbox_inches = 'tight') boxplot_frequency.figure.savefig(os.path.join(output_dir, 'boxplot.pdf'), bbox_inches = 'tight') plt.gcf().clear() print('Plotting boxplot ...') else: if not metadata_column: raise ValueError("Metadata column not provided") metadata = metadata.to_dataframe() metadata.index.name = "sample-id" metadata.reset_index(inplace = True) merged_metadata = pd.merge(merged,metadata, on = "sample-id") melted_merged_metadata = pd.merge(melted_merged, metadata, on = "sample-id") melted_merged_metadata = melted_merged_metadata.rename(columns = {'variable':'Table', 'value':'Sequencing Depth'}) # if verbose: print('Merging metadata ...') sns.set_style(style) sns.set_context(context) pairplot_frequency = sns.pairplot(merged_metadata, hue = metadata_column, vars = vars_to_plot, palette = palette) pairplot_frequency.savefig(os.path.join(output_dir, 'pairplot_frequency.png')) pairplot_frequency.savefig(os.path.join(output_dir, 'pairplot_frequency.pdf')) plt.gcf().clear() # if verbose: print('Plotting pairplot ...') #raincloud_frequency = pt.RainCloud( x = 'Table', y = 'Sequencing Depth', data = melted_merged_metadata, # orient = 'h', hue = metadata_column, alpha = 0.65, palette = palette ) #raincloud_frequency.figure.savefig(os.path.join(output_dir, 'raincloud.png'), bbox_inches = 'tight') #raincloud_frequency.figure.savefig(os.path.join(output_dir, 'raincloud.pdf'), bbox_inches = 'tight') #plt.gcf().clear() # if verbose: print('Plotting raincloud ...') boxplot_frequency = sns.boxplot(data=melted_merged_metadata,x='Table',y='Sequencing Depth',hue=metadata_column, palette = palette, saturation = 1) boxplot_frequency.figure.savefig(os.path.join(output_dir, 'boxplot.png'), bbox_inches = 'tight') boxplot_frequency.figure.savefig(os.path.join(output_dir, 'boxplot.pdf'), bbox_inches = 'tight') plt.gcf().clear() # if verbose: print('Plotting boxplot ...') # melted_merged.to_numpy() # for i in range(len(merged.columns)-1): # col = index = os.path.join(TEMPLATES, 'frequency_assets', 'index.html') q2templates.render(index, output_dir)
def denoise_stats( output_dir: str, stats: qiime2. Metadata, #stats type is not a metadata but this is the transformer used by DADA2 plugin to make DADA2Stats into pd.dataframe plot_type: str = 'line', labels: str = None, style: str = 'whitegrid', context: str = 'talk') -> None: if not labels: stats = load_df(stats) else: stats = load_df_labels(stats, labels) stats = pd.concat(stats) numeric = ['denoised', 'filtered', 'input', 'non-chimeric'] stats[numeric] = stats[numeric].apply(pd.to_numeric) #makes into a df stats = stats.groupby('id').sum() stats = stats.drop(columns=[ 'percentage of input passed filter', 'percentage of input non-chimeric' ]) df = pd.melt(stats.reset_index(), id_vars='id', var_name='step', value_name='read_number') input_read_number = df['read_number'].max() df['% of Reads Remaining'] = df['read_number'] / input_read_number * 100 step_order = {'input': 0, 'filtered': 1, 'denoised': 2, 'non-chimeric': 3} df['order'] = df['step'].apply(lambda x: step_order[x]) df = df.reset_index() df['Run Number'] = 'Run ' + df['id'] hue_order = df.query('step == "non-chimeric"').sort_values( '% of Reads Remaining', ascending=False)['id'] sns.set_style('whitegrid') sns.set_context("talk") line_graph = sns.lineplot(data=df, y='% of Reads Remaining', x='order', hue='Run Number') plt.ylim(0, 100) plt.xlim(0, 4) plt.xticks([x / 2 for x in range(0, 9)], ['Input', '', 'Filtered', '', 'Denoised', "", 'Non-chimeric']) plt.xlabel('Processing Steps') plt.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1) # plt.title('allow to give any title or default one') line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.png'), bbox_inches='tight') line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.pdf'), bbox_inches='tight') plt.gcf().clear() #maybe the bargraph r = range(len(stats)) vars_to_plot = df['id'].values print(df.shape) colors = ['darkorange', 'orange', 'sandybrown', 'navajowhite'] Step = ['Input', 'Filtered', 'Denoised', 'Non-chimeric'] plt.bar(r, df[df['step'] == 'input']['read_number'], color=colors[0], edgecolor='white', width=1) plt.bar(r, df[df['step'] == 'filtered']['read_number'], color=colors[1], edgecolor='white', width=1) plt.bar(r, df[df['step'] == 'denoised']['read_number'], color=colors[2], edgecolor='white', width=1) #plt.bar(r, df[df['step']=='merged']['read_number'], color = colors[3], edgecolor = 'white', width = 1) plt.bar(r, df[df['step'] == 'non-chimeric']['read_number'], color=colors[3], edgecolor='white', width=1) plt.xticks(r, vars_to_plot, fontweight='bold') plt.xlabel('Method') plt.ylabel('Sequencing Depth') plt.legend(Step, bbox_to_anchor=(1.05, 1), loc=2) plt.savefig(os.path.join(output_dir, 'bar_plot.png'), bbox_inches='tight') plt.savefig(os.path.join(output_dir, 'bar_plot.pdf'), bbox_inches='tight') plt.gcf().clear() index = os.path.join(TEMPLATES, 'denoise_assets', 'index.html') q2templates.render(index, output_dir)
def alpha_diversity(output_dir: str, alpha_diversity: pd.Series, metadata_column: str = None, metadata: qiime2.Metadata = None, palette: str = 'husl', style: str = 'white', context: str = 'paper', labels : str = None ) -> None: #first 2 vectors alpha_div1 = alpha_diversity[0].to_frame() alpha_div2 = alpha_diversity[1].to_frame() alpha_div1.index.name = "sample-id" alpha_div1.reset_index(inplace=True) alpha_div2.index.name = "sample-id" alpha_div2.reset_index(inplace=True) if not labels: merged = pd.merge(alpha_div1, alpha_div2, on = 'sample-id') merged = merged.rename(columns = {'shannon_x':'1', 'shannon_y':'2'}) vars_to_plot = ['1', '2'] print ('Labeling columns...') if len(alpha_diversity) >2: for i in range((len(alpha_diversity))-2) : alpha_div = alpha_diversity[i+2].to_frame() alpha_div.index.name = "sample-id" alpha_div.reset_index(inplace = True) merged = pd.merge(merged, alpha_div, on = "sample-id") merged = merged.rename(columns = {'shannon':(i+3)}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) else: if len(labels) != len(alpha_diversity): raise ValueError("The number of labels is different than the number of tables") merged = pd.merge(alpha_div1, alpha_div2, on = "sample-id") merged = merged.rename(columns = {'shannon_x':labels[0], 'shannon_y':labels[1]}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) print('Labeling columns ...') if len(alpha_diversity) >2: for i in range((len(alpha_diversity))-2) : alpha_div = alpha_diversity[i+2].to_frame() alpha_div.index.name = "sample-id" alpha_div.reset_index(inplace = True) merged = pd.merge(merged, alpha_div, on = "sample-id") merged = merged.rename(columns = {'shannon':labels[i+2]}) vars_to_plot = list(merged.loc[:, merged.columns !='sample-id']) melted_merged = pd.melt(merged, id_vars = 'sample-id') if not metadata: melted_merged = pd.melt(merged, id_vars = 'sample-id') melted_merged = melted_merged.rename(columns = {'variable':'Table', 'value':'Alpha Diversity Index'}) print('Plotting pairplot ...') sns.set_style(style) sns.set_context(context) pairplot_diversity = sns.pairplot(merged, vars = vars_to_plot, palette = palette) pairplot_diversity.savefig(os.path.join(output_dir, 'pairplot_diversity.png')) pairplot_diversity.savefig(os.path.join(output_dir, 'pairplot_diversity.pdf')) plt.gcf().clear() print('Plotting raincloud ...') #raincloud_diversity = pt.RainCloud(x = 'Table', y = 'Alpha Diversity Index', data = melted_merged, #orient = 'h', alpha = 0.65, palette = palette) #raincloud_diversity.figure.savefig(os.path.join(output_dir, 'raincloud_diversity.png'), bbox_inches = 'tight') #raincloud_diversity.figure.savefig(os.path.join(output_dir, 'raincloud_diversity.pdf'), bbox_inches = 'tight') #plt.gcf().clear() print('Plotting boxplot ...') boxplot_diversity = sns.boxplot(data = melted_merged, x= 'Table', y= 'Alpha Diversity Index', palette = palette, saturation = 1) boxplot_diversity.figure.savefig(os.path.join(output_dir, 'boxplot_diversity.png'), bbox_inches = 'tight') boxplot_diversity.figure.savefig(os.path.join(output_dir, 'boxplot_diversity.pdf'), bbox_inches = 'tight') plt.gcf().clear() else: if not metadata_column: raise ValueError("Metadata column not provided") print('Merging metadata ...') metadata = metadata.to_dataframe() metadata.index.name = "sample-id" metadata.reset_index(inplace = True) merged_metadata = pd.merge(merged, metadata, on = "sample-id") melted_merged_metadata = pd.merge(melted_merged, metadata, on = "sample-id") melted_merged_metadata = melted_merged_metadata.rename(columns = {'variable':'Table', 'value':'Alpha Diversity Index'}) print('Plotting pairplot ...') sns.set_style(style) sns.set_context(context) pairplot_diversity = sns.pairplot(merged_metadata, hue = metadata_column, vars = vars_to_plot, palette = palette) pairplot_diversity.savefig(os.path.join(output_dir, 'pairplot_diversity.png')) pairplot_diversity.savefig(os.path.join(output_dir, 'pairplot_diversity.pdf')) plt.gcf().clear() print('Plotting raincloud ...') #raincloud_diversity = pt.RainCloud( x = 'Table', y = 'Alpha Diversity Index', data = melted_merged_metadata, # orient = 'h', hue = metadata_column, alpha = 0.65, palette = palette ) #raincloud_diversity.figure.savefig(os.path.join(output_dir, 'raincloud_diversity.png'), bbox_inches = 'tight') #raincloud_diversity.figure.savefig(os.path.join(output_dir, 'raincloud_diversity.pdf'), bbox_inches = 'tight') #plt.gcf().clear() print('Plotting boxplot ...') boxplot_diversity = sns.boxplot(data=melted_merged_metadata,x='Table',y='Alpha Diversity Index',hue=metadata_column, palette = palette, saturation = 1) boxplot_diversity.figure.savefig(os.path.join(output_dir, 'boxplot_diversity.png'), bbox_inches = 'tight') boxplot_diversity.figure.savefig(os.path.join(output_dir, 'boxplot_diversity.pdf'), bbox_inches = 'tight') plt.gcf().clear() index = os.path.join(TEMPLATES, 'diversity_assets', 'index.html') q2templates.render(index, output_dir)