def n50_barplot(df, figformat, path, title=None, palette=None): n50_bar = Plot(path=path + "NanoComp_N50." + figformat, title="Comparing read length N50") if "aligned_lengths" in df: n50s = [ get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"])) for d in df["dataset"].unique() ] ylabel = 'Total gigabase aligned' else: n50s = [ get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"])) for d in df["dataset"].unique() ] ylabel = 'Sequenced read length N50' ax = sns.barplot(x=list(df["dataset"].unique()), y=n50s, palette=palette, order=df["dataset"].unique()) ax.set(ylabel=ylabel, title=title or n50_bar.title) plt.xticks(rotation=30, ha='center') n50_bar.fig = ax.get_figure() n50_bar.save(format=figformat) plt.close("all") return [n50_bar]
def n50_barplot(df, path, settings, title=None): ''' Returns Plot object and creates figure(format specified)/html containing bar chart of total gb aligned/sequenced read length n50 ''' n50_bar = Plot(path=path + "NanoComp_N50.html", title="Comparing read length N50") if "aligned_lengths" in df: n50s = [ get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"])) for d in df["dataset"].unique() ] ylabel = 'Total gigabase aligned' else: n50s = [ get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"])) for d in df["dataset"].unique() ] ylabel = 'Sequenced read length N50' idx = df["dataset"].unique() n50_bar.fig = go.Figure() for idx, n50 in zip(idx, n50s): n50_bar.fig.add_trace(go.Bar(x=[idx], y=[n50], name=idx)) n50_bar.fig.update_layout( title=title or n50_bar.title, title_x=0.5, yaxis_title=ylabel, ) n50_bar.html = n50_bar.fig.to_html(full_html=False, include_plotlyjs='cdn') n50_bar.save(settings) return [n50_bar]
def make_plots(datadf, settings): ''' Call plotting functions from nanoplotter settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use ''' plot_settings = dict(font_scale=settings["font_scale"]) nanoplotter.plot_settings(plot_settings, dpi=settings["dpi"]) color = nanoplotter.check_valid_color(settings["color"]) colormap = nanoplotter.check_valid_colormap(settings["colormap"]) plotdict = { type: settings["plots"].count(type) for type in ["kde", "hex", "dot", 'pauvre'] } plots = [] if settings["N50"]: n50 = nanomath.get_N50(np.sort(datadf["lengths"])) else: n50 = None plots.extend( nanoplotter.length_plots( array=datadf[datadf["length_filter"]]["lengths"].astype('uint64'), name="Read length", path=settings["path"], n50=n50, color=color, figformat=settings["format"], title=settings["title"])) logging.info("Created length plots") if "quals" in datadf: plots.extend( nanoplotter.scatter(x=datadf[datadf["length_filter"]][ settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["quals"], names=['Read lengths', 'Average read quality'], path=settings["path"] + "LengthvsQualityScatterPlot", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings)) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][ settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["quals"], names=['Read lengths', 'Average read quality'], path=settings["path"] + "LengthvsQualityScatterPlot", color=color, figformat=settings["format"], plots=plotdict, log=True, title=settings["title"], plot_settings=plot_settings)) logging.info("Created LengthvsQual plot") if "channelIDs" in datadf: plots.extend( nanoplotter.spatial_heatmap(array=datadf["channelIDs"], title=settings["title"], path=settings["path"] + "ActivityMap_ReadsPerChannel", color=colormap, figformat=settings["format"])) logging.info("Created spatialheatmap for succesfull basecalls.") if "start_time" in datadf: plots.extend( nanoplotter.time_plots(df=datadf, path=settings["path"], color=color, figformat=settings["format"], title=settings["title"], plot_settings=plot_settings)) if settings["logBool"]: plots.extend( nanoplotter.time_plots(df=datadf, path=settings["path"], color=color, figformat=settings["format"], title=settings["title"], log_length=True, plot_settings=plot_settings)) logging.info("Created timeplots.") if "aligned_lengths" in datadf and "lengths" in datadf: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]]["aligned_lengths"], y=datadf[datadf["length_filter"]]["lengths"], names=["Aligned read lengths", "Sequenced read length"], path=settings["path"] + "AlignedReadlengthvsSequencedReadLength", figformat=settings["format"], plots=plotdict, color=color, title=settings["title"], plot_settings=plot_settings)) logging.info("Created AlignedLength vs Length plot.") if "mapQ" in datadf and "quals" in datadf: plots.extend( nanoplotter.scatter( x=datadf["mapQ"], y=datadf["quals"], names=["Read mapping quality", "Average basecall quality"], path=settings["path"] + "MappingQualityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings)) logging.info("Created MapQvsBaseQ plot.") plots.extend( nanoplotter.scatter(x=datadf[datadf["length_filter"]][ settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["mapQ"], names=["Read length", "Read mapping quality"], path=settings["path"] + "MappingQualityvsReadLength", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings)) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][ settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["mapQ"], names=["Read length", "Read mapping quality"], path=settings["path"] + "MappingQualityvsReadLength", color=color, figformat=settings["format"], plots=plotdict, log=True, title=settings["title"], plot_settings=plot_settings)) logging.info("Created Mapping quality vs read length plot.") if "percentIdentity" in datadf: minPID = np.percentile(datadf["percentIdentity"], 1) if "aligned_quals" in datadf: plots.extend( nanoplotter.scatter( x=datadf["percentIdentity"], y=datadf["aligned_quals"], names=["Percent identity", "Average Base Quality"], path=settings["path"] + "PercentIdentityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, minvalx=minPID, title=settings["title"], plot_settings=plot_settings)) logging.info("Created Percent ID vs Base quality plot.") plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][ settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["percentIdentity"], names=["Aligned read length", "Percent identity"], path=settings["path"] + "PercentIdentityvsAlignedReadLength", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, minvaly=minPID, title=settings["title"], plot_settings=plot_settings)) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][ settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["percentIdentity"], names=["Aligned read length", "Percent identity"], path=settings["path"] + "PercentIdentityvsAlignedReadLength", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, log=True, minvaly=minPID, title=settings["title"], plot_settings=plot_settings)) plots.append( nanoplotter.dynamic_histogram(array=datadf["percentIdentity"], name="percent identity", path=settings["path"] + "PercentIdentityHistogram", title=settings["title"], color=color)) logging.info("Created Percent ID vs Length plot") return plots
def make_plots(datadf, settings): ''' Call plotting functions from nanoplotter settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use ''' color = nanoplotter.check_valid_color(settings["color"]) plotdict = { type: settings["plots"].count(type) for type in ["kde", "hex", "dot", 'pauvre'] } plots = [] if settings["no_N50"]: n50 = None else: n50 = nanomath.get_N50(np.sort(datadf["lengths"])) plots.extend( nanoplotter.length_plots(array=datadf["lengths"], name="Read length", path=settings["path"], n50=n50, color=color, figformat=settings["format"], title=settings["title"])) logging.info("Created length plots") if "quals" in datadf: plots.extend( nanoplotter.scatter(x=datadf[settings["lengths_pointer"]], y=datadf["quals"], names=['Read lengths', 'Average read quality'], path=settings["path"] + settings["length_prefix"] + "LengthvsQualityScatterPlot", color=color, figformat=settings["format"], plots=plotdict, log=settings["logBool"], title=settings["title"])) logging.info("Created LengthvsQual plot") if "channelIDs" in datadf: plots.extend( nanoplotter.spatial_heatmap(array=datadf["channelIDs"], title=settings["title"], path=settings["path"] + "ActivityMap_ReadsPerChannel", color="Greens", figformat=settings["format"])) logging.info("Created spatialheatmap for succesfull basecalls.") if "start_time" in datadf: plots.extend( nanoplotter.time_plots(df=datadf, path=settings["path"], color=color, figformat=settings["format"], title=settings["title"])) logging.info("Created timeplots.") if "aligned_lengths" in datadf and "lengths" in datadf: plots.extend( nanoplotter.scatter( x=datadf["aligned_lengths"], y=datadf["lengths"], names=["Aligned read lengths", "Sequenced read length"], path=settings["path"] + "AlignedReadlengthvsSequencedReadLength", figformat=settings["format"], plots=plotdict, color=color, title=settings["title"])) logging.info("Created AlignedLength vs Length plot.") if "maqpQ" in datadf: plots.extend( nanoplotter.scatter( x=datadf["mapQ"], y=datadf["quals"], names=["Read mapping quality", "Average basecall quality"], path=settings["path"] + "MappingQualityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"])) logging.info("Created MapQvsBaseQ plot.") plots.extend( nanoplotter.scatter(x=datadf[settings["lengths_pointer"]], y=datadf["mapQ"], names=["Read length", "Read mapping quality"], path=settings["path"] + settings["length_prefix"] + "MappingQualityvsReadLength", color=color, figformat=settings["format"], plots=plotdict, log=settings["logBool"], title=settings["title"])) logging.info("Created Mapping quality vs read length plot.") if "percentIdentity" in datadf: minPID = np.percentile(datadf["percentIdentity"], 1) plots.extend( nanoplotter.scatter(x=datadf["percentIdentity"], y=datadf["aligned_quals"], names=["Percent identity", "Read quality"], path=settings["path"] + "PercentIdentityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr, minvalx=minPID, title=settings["title"])) logging.info("Created Percent ID vs Base quality plot.") plots.extend( nanoplotter.scatter( x=datadf[settings["lengths_pointer"]], y=datadf["percentIdentity"], names=["Aligned read length", "Percent identity"], path=settings["path"] + "PercentIdentityvsAlignedReadLength", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr, log=settings["logBool"], minvaly=minPID, title=settings["title"])) logging.info("Created Percent ID vs Length plot") return plots