Exemple #1
0
def n50_barplot(df, figformat, path, title=None, palette=None):
    n50_bar = Plot(path=path + "NanoComp_N50." + figformat,
                   title="Comparing read length N50")
    if "aligned_lengths" in df:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Total gigabase aligned'
    else:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Sequenced read length N50'
    ax = sns.barplot(x=list(df["dataset"].unique()),
                     y=n50s,
                     palette=palette,
                     order=df["dataset"].unique())
    ax.set(ylabel=ylabel, title=title or n50_bar.title)
    plt.xticks(rotation=30, ha='center')
    n50_bar.fig = ax.get_figure()
    n50_bar.save(format=figformat)
    plt.close("all")
    return [n50_bar]
Exemple #2
0
def n50_barplot(df, path, settings, title=None):
    '''
    Returns Plot object and creates figure(format specified)/html
    containing bar chart of total gb aligned/sequenced read length n50
    '''
    n50_bar = Plot(path=path + "NanoComp_N50.html",
                   title="Comparing read length N50")
    if "aligned_lengths" in df:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Total gigabase aligned'
    else:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Sequenced read length N50'

    idx = df["dataset"].unique()

    n50_bar.fig = go.Figure()

    for idx, n50 in zip(idx, n50s):
        n50_bar.fig.add_trace(go.Bar(x=[idx], y=[n50], name=idx))

    n50_bar.fig.update_layout(
        title=title or n50_bar.title,
        title_x=0.5,
        yaxis_title=ylabel,
    )

    n50_bar.html = n50_bar.fig.to_html(full_html=False, include_plotlyjs='cdn')
    n50_bar.save(settings)
    return [n50_bar]
Exemple #3
0
def make_plots(datadf, settings):
    '''
    Call plotting functions from nanoplotter
    settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use
    '''
    plot_settings = dict(font_scale=settings["font_scale"])
    nanoplotter.plot_settings(plot_settings, dpi=settings["dpi"])
    color = nanoplotter.check_valid_color(settings["color"])
    colormap = nanoplotter.check_valid_colormap(settings["colormap"])
    plotdict = {
        type: settings["plots"].count(type)
        for type in ["kde", "hex", "dot", 'pauvre']
    }
    plots = []
    if settings["N50"]:
        n50 = nanomath.get_N50(np.sort(datadf["lengths"]))
    else:
        n50 = None
    plots.extend(
        nanoplotter.length_plots(
            array=datadf[datadf["length_filter"]]["lengths"].astype('uint64'),
            name="Read length",
            path=settings["path"],
            n50=n50,
            color=color,
            figformat=settings["format"],
            title=settings["title"]))
    logging.info("Created length plots")
    if "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(x=datadf[datadf["length_filter"]][
                settings["lengths_pointer"].replace('log_', '')],
                                y=datadf[datadf["length_filter"]]["quals"],
                                names=['Read lengths', 'Average read quality'],
                                path=settings["path"] +
                                "LengthvsQualityScatterPlot",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                title=settings["title"],
                                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["quals"],
                    names=['Read lengths', 'Average read quality'],
                    path=settings["path"] + "LengthvsQualityScatterPlot",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings))
        logging.info("Created LengthvsQual plot")
    if "channelIDs" in datadf:
        plots.extend(
            nanoplotter.spatial_heatmap(array=datadf["channelIDs"],
                                        title=settings["title"],
                                        path=settings["path"] +
                                        "ActivityMap_ReadsPerChannel",
                                        color=colormap,
                                        figformat=settings["format"]))
        logging.info("Created spatialheatmap for succesfull basecalls.")
    if "start_time" in datadf:
        plots.extend(
            nanoplotter.time_plots(df=datadf,
                                   path=settings["path"],
                                   color=color,
                                   figformat=settings["format"],
                                   title=settings["title"],
                                   plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.time_plots(df=datadf,
                                       path=settings["path"],
                                       color=color,
                                       figformat=settings["format"],
                                       title=settings["title"],
                                       log_length=True,
                                       plot_settings=plot_settings))
        logging.info("Created timeplots.")
    if "aligned_lengths" in datadf and "lengths" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]]["aligned_lengths"],
                y=datadf[datadf["length_filter"]]["lengths"],
                names=["Aligned read lengths", "Sequenced read length"],
                path=settings["path"] +
                "AlignedReadlengthvsSequencedReadLength",
                figformat=settings["format"],
                plots=plotdict,
                color=color,
                title=settings["title"],
                plot_settings=plot_settings))
        logging.info("Created AlignedLength vs Length plot.")
    if "mapQ" in datadf and "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["mapQ"],
                y=datadf["quals"],
                names=["Read mapping quality", "Average basecall quality"],
                path=settings["path"] + "MappingQualityvsAverageBaseQuality",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"],
                plot_settings=plot_settings))
        logging.info("Created MapQvsBaseQ plot.")
        plots.extend(
            nanoplotter.scatter(x=datadf[datadf["length_filter"]][
                settings["lengths_pointer"].replace('log_', '')],
                                y=datadf[datadf["length_filter"]]["mapQ"],
                                names=["Read length", "Read mapping quality"],
                                path=settings["path"] +
                                "MappingQualityvsReadLength",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                title=settings["title"],
                                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["mapQ"],
                    names=["Read length", "Read mapping quality"],
                    path=settings["path"] + "MappingQualityvsReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings))
        logging.info("Created Mapping quality vs read length plot.")
    if "percentIdentity" in datadf:
        minPID = np.percentile(datadf["percentIdentity"], 1)
        if "aligned_quals" in datadf:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf["percentIdentity"],
                    y=datadf["aligned_quals"],
                    names=["Percent identity", "Average Base Quality"],
                    path=settings["path"] +
                    "PercentIdentityvsAverageBaseQuality",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr
                    if not settings["hide_stats"] else None,
                    minvalx=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings))
            logging.info("Created Percent ID vs Base quality plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]][
                    settings["lengths_pointer"].replace('log_', '')],
                y=datadf[datadf["length_filter"]]["percentIdentity"],
                names=["Aligned read length", "Percent identity"],
                path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                stat=stats.pearsonr if not settings["hide_stats"] else None,
                minvaly=minPID,
                title=settings["title"],
                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["percentIdentity"],
                    names=["Aligned read length", "Percent identity"],
                    path=settings["path"] +
                    "PercentIdentityvsAlignedReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr
                    if not settings["hide_stats"] else None,
                    log=True,
                    minvaly=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings))

        plots.append(
            nanoplotter.dynamic_histogram(array=datadf["percentIdentity"],
                                          name="percent identity",
                                          path=settings["path"] +
                                          "PercentIdentityHistogram",
                                          title=settings["title"],
                                          color=color))
        logging.info("Created Percent ID vs Length plot")
    return plots
Exemple #4
0
def make_plots(datadf, settings):
    '''
    Call plotting functions from nanoplotter
    settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use
    '''
    color = nanoplotter.check_valid_color(settings["color"])
    plotdict = {
        type: settings["plots"].count(type)
        for type in ["kde", "hex", "dot", 'pauvre']
    }
    plots = []
    if settings["no_N50"]:
        n50 = None
    else:
        n50 = nanomath.get_N50(np.sort(datadf["lengths"]))
    plots.extend(
        nanoplotter.length_plots(array=datadf["lengths"],
                                 name="Read length",
                                 path=settings["path"],
                                 n50=n50,
                                 color=color,
                                 figformat=settings["format"],
                                 title=settings["title"]))
    logging.info("Created length plots")
    if "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(x=datadf[settings["lengths_pointer"]],
                                y=datadf["quals"],
                                names=['Read lengths', 'Average read quality'],
                                path=settings["path"] +
                                settings["length_prefix"] +
                                "LengthvsQualityScatterPlot",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                log=settings["logBool"],
                                title=settings["title"]))
        logging.info("Created LengthvsQual plot")
    if "channelIDs" in datadf:
        plots.extend(
            nanoplotter.spatial_heatmap(array=datadf["channelIDs"],
                                        title=settings["title"],
                                        path=settings["path"] +
                                        "ActivityMap_ReadsPerChannel",
                                        color="Greens",
                                        figformat=settings["format"]))
        logging.info("Created spatialheatmap for succesfull basecalls.")
    if "start_time" in datadf:
        plots.extend(
            nanoplotter.time_plots(df=datadf,
                                   path=settings["path"],
                                   color=color,
                                   figformat=settings["format"],
                                   title=settings["title"]))
        logging.info("Created timeplots.")
    if "aligned_lengths" in datadf and "lengths" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["aligned_lengths"],
                y=datadf["lengths"],
                names=["Aligned read lengths", "Sequenced read length"],
                path=settings["path"] +
                "AlignedReadlengthvsSequencedReadLength",
                figformat=settings["format"],
                plots=plotdict,
                color=color,
                title=settings["title"]))
        logging.info("Created AlignedLength vs Length plot.")
    if "maqpQ" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["mapQ"],
                y=datadf["quals"],
                names=["Read mapping quality", "Average basecall quality"],
                path=settings["path"] + "MappingQualityvsAverageBaseQuality",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"]))
        logging.info("Created MapQvsBaseQ plot.")
        plots.extend(
            nanoplotter.scatter(x=datadf[settings["lengths_pointer"]],
                                y=datadf["mapQ"],
                                names=["Read length", "Read mapping quality"],
                                path=settings["path"] +
                                settings["length_prefix"] +
                                "MappingQualityvsReadLength",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                log=settings["logBool"],
                                title=settings["title"]))
        logging.info("Created Mapping quality vs read length plot.")
    if "percentIdentity" in datadf:
        minPID = np.percentile(datadf["percentIdentity"], 1)
        plots.extend(
            nanoplotter.scatter(x=datadf["percentIdentity"],
                                y=datadf["aligned_quals"],
                                names=["Percent identity", "Read quality"],
                                path=settings["path"] +
                                "PercentIdentityvsAverageBaseQuality",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                stat=stats.pearsonr,
                                minvalx=minPID,
                                title=settings["title"]))
        logging.info("Created Percent ID vs Base quality plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[settings["lengths_pointer"]],
                y=datadf["percentIdentity"],
                names=["Aligned read length", "Percent identity"],
                path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                stat=stats.pearsonr,
                log=settings["logBool"],
                minvaly=minPID,
                title=settings["title"]))
        logging.info("Created Percent ID vs Length plot")
    return plots