コード例 #1
0
def n50_barplot(df, figformat, path, title=None, palette=None):
    n50_bar = Plot(path=path + "NanoComp_N50." + figformat,
                   title="Comparing read length N50")
    if "aligned_lengths" in df:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Total gigabase aligned'
    else:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Sequenced read length N50'
    ax = sns.barplot(x=list(df["dataset"].unique()),
                     y=n50s,
                     palette=palette,
                     order=df["dataset"].unique())
    ax.set(ylabel=ylabel, title=title or n50_bar.title)
    plt.xticks(rotation=30, ha='center')
    n50_bar.fig = ax.get_figure()
    n50_bar.save(format=figformat)
    plt.close("all")
    return [n50_bar]
コード例 #2
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def sequencing_speed_over_time(dfs, path, title, settings, color="#4CB391"):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot.html",
                         title="Violin plot of sequencing speed over time")

    mask = dfs['duration'] != 0

    fig = go.Figure()

    fig.add_trace(
        go.Violin(x=dfs.loc[mask, "timebin"],
                  y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))

    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Sequencing speed (nucleotides/second)',
                      title=title or time_duration.title,
                      title_x=0.5)

    fig.update_xaxes(tickangle=45)

    time_duration.fig = fig
    time_duration.html = time_duration.fig.to_html(full_html=False,
                                                   include_plotlyjs='cdn')
    time_duration.save(settings)

    return time_duration
コード例 #3
0
ファイル: timeplots.py プロジェクト: pphector/NanoPlot
def cumulative_yield(dfs, path, figformat, title, color):
    cum_yield_gb = Plot(path=path + "CumulativeYieldPlot_Gigabases." +
                        figformat,
                        title="Cumulative yield")
    s = dfs.loc[:, "lengths"].cumsum().resample('1T').max() / 1e9
    ax = sns.regplot(x=s.index.total_seconds() / 3600,
                     y=s,
                     x_ci=None,
                     fit_reg=False,
                     color=color,
                     scatter_kws={"s": 3})
    ax.set(xlabel='Run time (hours)',
           ylabel='Cumulative yield in gigabase',
           title=title or cum_yield_gb.title)
    cum_yield_gb.fig = ax.get_figure()
    cum_yield_gb.save(format=figformat)
    plt.close("all")

    cum_yield_reads = Plot(path=path + "CumulativeYieldPlot_NumberOfReads." +
                           figformat,
                           title="Cumulative yield")
    s = dfs.loc[:, "lengths"].resample('10T').count().cumsum()
    ax = sns.regplot(x=s.index.total_seconds() / 3600,
                     y=s,
                     x_ci=None,
                     fit_reg=False,
                     color=color,
                     scatter_kws={"s": 3})
    ax.set(xlabel='Run time (hours)',
           ylabel='Cumulative yield in number of reads',
           title=title or cum_yield_reads.title)
    cum_yield_reads.fig = ax.get_figure()
    cum_yield_reads.save(format=figformat)
    plt.close("all")
    return [cum_yield_gb, cum_yield_reads]
コード例 #4
0
ファイル: timeplots.py プロジェクト: iliasbukraa/nanoplotter
def length_over_time(dfs, path, figformat, title, log_length=False, plot_settings={}):
    time_length = Plot(path=path + "TimeLengthViolinPlot." + figformat,
                       title="Violin plot of read lengths over time")
    sns.set(style="white", **plot_settings)
    if log_length:
        length_column = "log_lengths"
    else:
        length_column = "lengths"

    if "length_filter" in dfs:  # produced by NanoPlot filtering of too long reads
        temp_dfs = dfs[dfs["length_filter"]]
    else:
        temp_dfs = dfs

    ax = sns.violinplot(x="timebin",
                        y=length_column,
                        data=temp_dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Read length",
           title=title or time_length.title)
    if log_length:
        ticks = [10**i for i in range(10) if not 10**i > 10 * np.amax(dfs["lengths"])]
        ax.set(yticks=np.log10(ticks),
               yticklabels=ticks)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_length.fig = ax.get_figure()
    time_length.save(format=figformat)
    plt.close("all")
    return time_length
コード例 #5
0
def overlay_histogram_phred(df, path, settings, palette=None):
    """
    Reads with a perfect alignment and thus a percentIdentity of 100
    get a phred score of Inf
    Which is not cool
    So these are set to 60, a very high phred score
    """
    df["phredIdentity"] = -10 * np.log10(1 - (df["percentIdentity"] / 100))
    df["phredIdentity"][np.isinf(df["phredIdentity"])] = 60

    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist_phred = Plot(path=path + "NanoComp_OverlayHistogram_PhredScore.html",
                      title="Histogram of Phred scores")

    hist_phred.html, hist_phred.fig = plot_overlay_histogram(df,
                                                             palette,
                                                             "phredIdentity",
                                                             hist_phred.title,
                                                             bins=20,
                                                             density=True)

    hist_phred.save(settings)

    return hist_phred
コード例 #6
0
def spatial_heatmap(array, path, colormap, figformat, title=None):
    """Taking channel information and creating post run channel activity plots."""
    logging.info(
        "Nanoplotter: Creating heatmap of reads per channel using {} reads.".
        format(array.size))

    activity_map = Plot(path=path + ".html",
                        title="Number of reads generated per channel")

    layout = make_layout(maxval=np.amax(array))
    valueCounts = pd.value_counts(pd.Series(array))

    for entry in valueCounts.keys():
        layout.template[np.where(
            layout.structure == entry)] = valueCounts[entry]

    data = pd.DataFrame(layout.template,
                        index=layout.yticks,
                        columns=layout.xticks)

    fig = go.Figure(
        data=go.Heatmap(z=data.values.tolist(), colorscale=colormap))
    fig.update_layout(xaxis_title='Channel',
                      yaxis_title='Number of reads',
                      title=title or activity_map.title,
                      title_x=0.5)

    activity_map.fig = fig
    activity_map.html = activity_map.fig.to_html(full_html=False,
                                                 include_plotlyjs='cdn')
    activity_map.save(figformat)
    return [activity_map]
コード例 #7
0
def output_barplot(df, figformat, path, title=None, palette=None):
    """Create barplots based on number of reads and total sum of nucleotides sequenced."""
    logging.info(
        "NanoComp: Creating barplots for number of reads and total throughput."
    )
    read_count = Plot(path=path + "NanoComp_number_of_reads." + figformat,
                      title="Comparing number of reads")
    ax = sns.countplot(x="dataset", data=df, palette=palette)
    ax.set(ylabel='Number of reads', title=title or read_count.title)
    plt.xticks(rotation=30, ha='center')
    read_count.fig = ax.get_figure()
    read_count.save(format=figformat)
    plt.close("all")

    throughput_bases = Plot(path=path + "NanoComp_total_throughput." +
                            figformat,
                            title="Comparing throughput in gigabases")
    if "aligned_lengths" in df:
        throughput = df.groupby('dataset')['aligned_lengths'].sum()
        ylabel = 'Total gigabase aligned'
    else:
        throughput = df.groupby('dataset')['lengths'].sum()
        ylabel = 'Total gigabase sequenced'
    ax = sns.barplot(x=list(throughput.index),
                     y=throughput / 1e9,
                     palette=palette,
                     order=df["dataset"].unique())
    ax.set(ylabel=ylabel, title=title or throughput_bases.title)
    plt.xticks(rotation=30, ha='center')
    throughput_bases.fig = ax.get_figure()
    throughput_bases.save(format=figformat)
    plt.close("all")
    return read_count, throughput_bases
コード例 #8
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def quality_over_time(dfs, path, settings, title=None, color="#4CB391"):
    time_qual = Plot(path=path + "TimeQualityViolinPlot.html",
                     title="Violin plot of quality over time")

    fig = go.Figure()

    fig.add_trace(
        go.Violin(y=dfs["quals"],
                  x=dfs["timebin"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))

    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Basecall quality',
                      title=title or time_qual.title,
                      title_x=0.5)

    fig.update_xaxes(tickangle=45)

    time_qual.fig = fig
    time_qual.html = time_qual.fig.to_html(full_html=False,
                                           include_plotlyjs='cdn')
    time_qual.save(settings)

    return time_qual
コード例 #9
0
def spatial_heatmap(array, path, title=None, color="Greens", figformat="png"):
    """Taking channel information and creating post run channel activity plots."""
    logging.info(
        "Nanoplotter: Creating heatmap of reads per channel using {} reads.".
        format(array.size))
    activity_map = Plot(path=path + "." + figformat,
                        title="Number of reads generated per channel")
    layout = make_layout(maxval=np.amax(array))
    valueCounts = pd.value_counts(pd.Series(array))
    for entry in valueCounts.keys():
        layout.template[np.where(
            layout.structure == entry)] = valueCounts[entry]
    plt.figure()
    ax = sns.heatmap(data=pd.DataFrame(layout.template,
                                       index=layout.yticks,
                                       columns=layout.xticks),
                     xticklabels="auto",
                     yticklabels="auto",
                     square=True,
                     cbar_kws={"orientation": "horizontal"},
                     cmap=color,
                     linewidths=0.20)
    ax.set_title(title or activity_map.title)
    activity_map.fig = ax.get_figure()
    activity_map.save(format=figformat)
    plt.close("all")
    return [activity_map]
コード例 #10
0
ファイル: nanoplotter_main.py プロジェクト: pythseq/NanoPlot
def yield_by_minimal_length_plot(array,
                                 name,
                                 path,
                                 settings,
                                 title=None,
                                 color="#4CB391"):
    df = pd.DataFrame(data={"lengths": np.sort(array)[::-1]})
    df["cumyield_gb"] = df["lengths"].cumsum() / 10**9
    idx = np.random.choice(array.index, min(10000, len(array)), replace=False)

    yield_by_length = Plot(path=path + "Yield_By_Length.html",
                           title="Yield by length")

    fig = px.scatter(df,
                     x=df.reindex(idx)["lengths"],
                     y=df.reindex(idx)["cumyield_gb"])
    fig.update_traces(marker=dict(color=color))
    fig.update_layout(xaxis_title='Read length',
                      yaxis_title='Cumulative yield for minimal length [Gb]',
                      title=title or yield_by_length.title,
                      title_x=0.5)

    yield_by_length.fig = fig
    yield_by_length.html = yield_by_length.fig.to_html(full_html=False,
                                                       include_plotlyjs='cdn')
    yield_by_length.save(settings)

    return yield_by_length
コード例 #11
0
def compare_cumulative_yields(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info(
        "NanoComp: Creating cumulative yield plots using {} reads.".format(
            len(dfs)))
    cum_yield_gb = Plot(path=path +
                        "NanoComp_CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")
    data = []
    annotations = []
    for sample, color in zip(df["dataset"].unique(), palette):
        cumsum = dfs.loc[dfs["dataset"] == sample,
                         "lengths"].cumsum().resample('10T').max() / 1e9
        data.append(
            go.Scatter(x=cumsum.index.total_seconds() / 3600,
                       y=cumsum,
                       opacity=0.75,
                       name=sample,
                       marker=dict(color=color)))
        annotations.append(
            dict(xref='paper',
                 x=0.99,
                 y=cumsum[-1],
                 xanchor='left',
                 yanchor='middle',
                 text='{}Gb'.format(round(cumsum[-1])),
                 showarrow=False))

    cum_yield_gb.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(barmode='overlay',
                      title=title or cum_yield_gb.title,
                      xaxis=dict(title="Time (hours)"),
                      yaxis=dict(title="Yield (gigabase)"),
                      annotations=annotations)
        },
        output_type="div",
        show_link=False)

    cum_yield_gb.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(barmode='overlay',
                  title=title or cum_yield_gb.title,
                  xaxis=dict(title="Time (hours)"),
                  yaxis=dict(title="Yield (gigabase)"),
                  annotations=annotations)
    })
    cum_yield_gb.save()
    return [cum_yield_gb]
コード例 #12
0
def output_barplot(df, path, settings, title=None):
    """Create barplots based on number of reads and total sum of nucleotides sequenced."""
    logging.info(
        "NanoComp: Creating barplots for number of reads and total throughput."
    )
    read_count = Plot(path=path + "NanoComp_number_of_reads.html",
                      title="Comparing number of reads")

    read_count.fig = go.Figure()

    counts = df['dataset'].value_counts(sort=False).sort_index()
    idx = counts.index

    for idx, count in zip(idx, counts):
        read_count.fig.add_trace(go.Bar(x=[idx], y=[count], name=idx))

    read_count.fig.update_layout(
        title_text=title or read_count.title,
        title_x=0.5,
        yaxis_title="Number of reads",
    )

    read_count.html = read_count.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
    read_count.save(settings)

    throughput_bases = Plot(path=path + "NanoComp_total_throughput.html",
                            title="Comparing throughput in bases")
    if "aligned_lengths" in df:
        throughput = df.groupby('dataset')['aligned_lengths'].sum()
        ylabel = 'Total bases aligned'
    else:
        throughput = df.groupby('dataset')['lengths'].sum()
        ylabel = 'Total bases sequenced'

    idx = df["dataset"].unique()

    throughput_bases.fig = go.Figure()
    for idx, sum_dataset in zip(idx, throughput):
        throughput_bases.fig.add_trace(
            go.Bar(x=[idx], y=[sum_dataset], name=idx))

    throughput_bases.fig.update_layout(
        title=title or throughput_bases.title,
        title_x=0.5,
        yaxis_title=ylabel,
    )

    throughput_bases.html = throughput_bases.fig.to_html(
        full_html=False, include_plotlyjs='cdn')
    throughput_bases.save(settings)

    return read_count, throughput_bases
コード例 #13
0
def dynamic_histogram(array, name, path, title=None, color="#4CB391"):
    """
    Use plotly to a histogram
    Return html code, but also save as png
    """
    dynhist = Plot(path=path + "Dynamic_Histogram_{}.html".format(name.replace(' ', '_')),
                   title=title or "Dynamic histogram of {}".format(name))
    dynhist.html, dynhist.fig = plotly_histogram(array=array.sample(min(len(array), 10000)),
                                                 color=color,
                                                 title=dynhist.title)
    dynhist.save()
    return dynhist
コード例 #14
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def length_over_time(dfs,
                     path,
                     title,
                     settings,
                     log_length=False,
                     color="#4CB391"):
    if log_length:
        time_length = Plot(path=path + "TimeLogLengthViolinPlot.html",
                           title="Violin plot of log read lengths over time")
    else:
        time_length = Plot(path=path + "TimeLengthViolinPlot.html",
                           title="Violin plot of read lengths over time")

    length_column = "log_lengths" if log_length else "lengths"

    if "length_filter" in dfs:  # produced by NanoPlot filtering of too long reads
        temp_dfs = dfs[dfs["length_filter"]]
    else:
        temp_dfs = dfs

    fig = go.Figure()

    fig.add_trace(
        go.Violin(y=temp_dfs[length_column],
                  x=temp_dfs["timebin"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))
    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Read length',
                      title=title or time_length.title,
                      title_x=0.5)

    if log_length:
        ticks = [
            10**i for i in range(10)
            if not 10**i > 10 * np.amax(dfs["lengths"])
        ]
        fig.update_layout(yaxis=dict(
            tickmode='array', tickvals=np.log10(ticks), ticktext=ticks))

    fig.update_yaxes(tickangle=45)

    time_length.fig = fig
    time_length.html = time_length.fig.to_html(full_html=False,
                                               include_plotlyjs='cdn')
    time_length.save(settings)

    return time_length
コード例 #15
0
def overlay_histogram_identity(df, path, settings, palette=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    hist_pid = Plot(path=path + "NanoComp_OverlayHistogram_Identity.html",
                    title="Histogram of percent reference identity")
    hist_pid.html, hist_pid.fig = plot_overlay_histogram(df,
                                                         palette,
                                                         "percentIdentity",
                                                         hist_pid.title,
                                                         density=True)
    hist_pid.save(settings)

    return hist_pid
コード例 #16
0
ファイル: compplots.py プロジェクト: RADnovogene/nanoplotter
def compare_cumulative_yields(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info(
        "Nanoplotter: Creating cumulative yield plots using {} reads.".format(
            len(dfs)))
    cum_yield_gb = Plot(path=path +
                        "NanoComp_CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")
    data = []
    for d, c in zip(df["dataset"].unique(), palette):
        s = dfs.loc[dfs["dataset"] == d,
                    "lengths"].cumsum().resample('10T').max() / 1e9
        data.append(
            go.Scatter(x=s.index.total_seconds() / 3600,
                       y=s,
                       opacity=0.75,
                       name=d,
                       marker=dict(color=c)))
    cum_yield_gb.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(
                barmode='overlay',
                title=title or cum_yield_gb.title,
                xaxis=dict(title="Time (hours)"),
                yaxis=dict(title="Yield (gigabase)"),
            )
        },
        output_type="div",
        show_link=False)

    cum_yield_gb.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(
            barmode='overlay',
            title=title or cum_yield_gb.title,
            xaxis=dict(title="Time (hours)"),
            yaxis=dict(title="Yield (gigabase)"),
        )
    })
    cum_yield_gb.save()
    return [cum_yield_gb]
コード例 #17
0
def active_pores_over_time(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info("NanoComp: Creating active pores plot using {} reads.".format(
        len(dfs)))
    active_pores = Plot(path=path + "NanoComp_ActivePoresOverTime.html",
                        title="Active pores over time")
    data = []
    for sample, color in zip(df["dataset"].unique(), palette):
        pores = dfs.loc[dfs["dataset"] == sample,
                        "channelIDs"].resample('10T').nunique()
        data.append(
            go.Scatter(x=pores.index.total_seconds() / 3600,
                       y=pores,
                       opacity=0.75,
                       name=sample,
                       marker=dict(color=color)))

    active_pores.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(
                barmode='overlay',
                title=title or active_pores.title,
                xaxis=dict(title="Time (hours)"),
                yaxis=dict(title="Active pores (per 10 minutes)"),
            )
        },
        output_type="div",
        show_link=False)

    active_pores.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(
            barmode='overlay',
            title=title or active_pores.title,
            xaxis=dict(title="Time (hours)"),
            yaxis=dict(title="Active pores (per 10 minutes)"),
        )
    })
    active_pores.save()
    return active_pores
コード例 #18
0
def dynamic_histogram(array, name, path, figformat, title=None, color="#4CB391"):
    """
    Use plotly to a histogram
    Return html code, but also save as png
    """
    dynhist = Plot(
        path=path + f"Dynamic_Histogram_{name[0].lower() + name[1:].replace(' ', '_')}.html",
        title="Dynamic histogram of {}".format(name[0].lower() + name[1:]))
    ylabel = "Number of reads" if len(array) <= 10000 else "Downsampled number of reads"
    dynhist.html, dynhist.fig = plotly_histogram(array=array.sample(min(len(array), 10000)),
                                                 color=color,
                                                 title=title or dynhist.title,
                                                 xlabel=name,
                                                 ylabel=ylabel)
    dynhist.save(figformat)
    return dynhist
コード例 #19
0
ファイル: timeplots.py プロジェクト: pphector/NanoPlot
def quality_over_time(dfs, path, figformat, title, plot_settings={}):
    time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat,
                     title="Violin plot of quality over time")
    sns.set(style="white", **plot_settings)
    ax = sns.violinplot(x="timebin",
                        y="quals",
                        data=dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Basecall quality",
           title=title or time_qual.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_qual.fig = ax.get_figure()
    time_qual.save(format=figformat)
    plt.close("all")
    return time_qual
コード例 #20
0
def plot_over_time(dfs, path, title, figformat, color="#4CB391"):
    num_reads = Plot(path=path + "NumberOfReads_Over_Time.html",
                     title="Number of reads over time")
    s = dfs.loc[:, "lengths"].resample('10T').count()

    fig = px.scatter(
        data_frame=None,
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Number of reads per 10 minutes',
                      title=title or num_reads.title,
                      title_x=0.5)

    num_reads.fig = fig
    num_reads.html = num_reads.fig.to_html(full_html=False, include_plotlyjs='cdn')
    num_reads.save(figformat)

    plots = [num_reads]

    if "channelIDs" in dfs:
        pores_over_time = Plot(path=path + "ActivePores_Over_Time.html",
                               title="Number of active pores over time")
        s = dfs.loc[:, "channelIDs"].resample('10T').nunique()

        fig = px.scatter(
            data_frame=None,
            x=s.index.total_seconds() / 3600,
            y=s)
        fig.update_traces(marker=dict(color=color))

        fig.update_layout(xaxis_title='Run time (hours)',
                          yaxis_title='Active pores per 10 minutes',
                          title=title or pores_over_time.title,
                          title_x=0.5)

        pores_over_time.fig = fig
        pores_over_time.html = pores_over_time.fig.to_html(full_html=False, include_plotlyjs='cdn')
        pores_over_time.save(figformat)

        plots.append(pores_over_time)
    return plots
コード例 #21
0
ファイル: timeplots.py プロジェクト: iliasbukraa/nanoplotter
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat,
                         title="Violin plot of sequencing speed over time")
    sns.set(style="white", **plot_settings)
    if "timebin" not in dfs:
        dfs['timebin'] = add_time_bins(dfs)
    ax = sns.violinplot(x=dfs["timebin"],
                        y=dfs["lengths"] / dfs["duration"],
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Sequencing speed (nucleotides/second)",
           title=title or time_duration.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_duration.fig = ax.get_figure()
    time_duration.save(format=figformat)
    plt.close("all")
    return time_duration
コード例 #22
0
def overlay_histogram_phred(df, path, figformat, palette=None):
    df["phredIdentity"] = -10 * np.log10(1 - (df["percentIdentity"] / 100))

    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist_phred = Plot(path=path + "NanoComp_OverlayHistogram_PhredScore.html",
                      title="Histogram of Phred scores")

    hist_phred.html, hist_phred.fig = plot_overlay_histogram(df,
                                                             palette,
                                                             "phredIdentity",
                                                             hist_phred.title,
                                                             bins=20,
                                                             density=True)

    hist_phred.save(figformat=figformat)

    return hist_phred
コード例 #23
0
ファイル: compplots.py プロジェクト: RADnovogene/nanoplotter
def compare_sequencing_speed(df, figformat, path, title=None, palette=None):
    logging.info(
        "Nanoplotter: creating comparison of sequencing speed over time.")
    seq_speed = Plot(path=path + "NanoComp_sequencing_speed_over_time." +
                     figformat,
                     title="Sequencing speed over time")
    dfs = check_valid_time_and_sort(df, "start_time")
    dfs['timebin'] = add_time_bins(dfs)
    ax = sns.violinplot(x=dfs["timebin"],
                        y=dfs["lengths"] / dfs["duration"],
                        hue=dfs["dataset"],
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Sequencing speed (nucleotides/second)")
    plt.xticks(rotation=45, ha='center', fontsize=8)
    seq_speed.fig = ax.get_figure()
    seq_speed.save(format=figformat)
    plt.close("all")
    return [seq_speed]
コード例 #24
0
def cumulative_yield(dfs, path, title, color, figformat):
    cum_yield_gb = Plot(path=path + "CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")

    s = dfs.loc[:, "lengths"].cumsum().resample('10T').max() / 1e9

    fig = px.scatter(
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Cumulative yield in gigabase',
                      title=title or cum_yield_gb.title,
                      title_x=0.5)

    cum_yield_gb.fig = fig
    cum_yield_gb.html = cum_yield_gb.fig.to_html(full_html=False, include_plotlyjs='cdn')
    cum_yield_gb.save(figformat)

    cum_yield_reads = Plot(path=path + "CumulativeYieldPlot_NumberOfReads.html",
                           title="Cumulative yield")

    s = dfs.loc[:, "lengths"].resample('10T').count().cumsum()

    fig = px.scatter(
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Cumulative yield in number of reads',
                      title=title or cum_yield_gb.title,
                      title_x=0.5)

    cum_yield_reads.fig = fig
    cum_yield_reads.html = cum_yield_reads.fig.to_html(full_html=False, include_plotlyjs='cdn')
    cum_yield_reads.save(figformat)

    return [cum_yield_gb, cum_yield_reads]
コード例 #25
0
def yield_by_minimal_length_plot(array, name, path,
                                 title=None, color="#4CB391", figformat="png"):
    df = pd.DataFrame(data={"lengths": np.sort(array)[::-1]})
    df["cumyield_gb"] = df["lengths"].cumsum() / 10**9
    yield_by_length = Plot(
        path=path + "Yield_By_Length." + figformat,
        title="Yield by length")
    ax = sns.regplot(
        x='lengths',
        y="cumyield_gb",
        data=df,
        x_ci=None,
        fit_reg=False,
        color=color,
        scatter_kws={"s": 3})
    ax.set(
        xlabel='Read length',
        ylabel='Cumulative yield for minimal length',
        title=title or yield_by_length.title)
    yield_by_length.fig = ax.get_figure()
    yield_by_length.save(format=figformat)
    plt.close("all")
    return yield_by_length
コード例 #26
0
ファイル: timeplots.py プロジェクト: pphector/NanoPlot
def plot_over_time(dfs, path, figformat, title, color):
    num_reads = Plot(path=path + "NumberOfReads_Over_Time." + figformat,
                     title="Number of reads over time")
    s = dfs.loc[:, "lengths"].resample('10T').count()
    ax = sns.regplot(x=s.index.total_seconds() / 3600,
                     y=s,
                     x_ci=None,
                     fit_reg=False,
                     color=color,
                     scatter_kws={"s": 3})
    ax.set(xlabel='Run time (hours)',
           ylabel='Number of reads per 10 minutes',
           title=title or num_reads.title)
    num_reads.fig = ax.get_figure()
    num_reads.save(format=figformat)
    plt.close("all")
    plots = [num_reads]

    if "channelIDs" in dfs:
        pores_over_time = Plot(path=path + "ActivePores_Over_Time." +
                               figformat,
                               title="Number of active pores over time")
        s = dfs.loc[:, "channelIDs"].resample('10T').nunique()
        ax = sns.regplot(x=s.index.total_seconds() / 3600,
                         y=s,
                         x_ci=None,
                         fit_reg=False,
                         color=color,
                         scatter_kws={"s": 3})
        ax.set(xlabel='Run time (hours)',
               ylabel='Active pores per 10 minutes',
               title=title or pores_over_time.title)
        pores_over_time.fig = ax.get_figure()
        pores_over_time.save(format=figformat)
        plt.close("all")
        plots.append(pores_over_time)
    return plots
コード例 #27
0
def n50_barplot(df, path, settings, title=None):
    '''
    Returns Plot object and creates figure(format specified)/html
    containing bar chart of total gb aligned/sequenced read length n50
    '''
    n50_bar = Plot(path=path + "NanoComp_N50.html",
                   title="Comparing read length N50")
    if "aligned_lengths" in df:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Total gigabase aligned'
    else:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Sequenced read length N50'

    idx = df["dataset"].unique()

    n50_bar.fig = go.Figure()

    for idx, n50 in zip(idx, n50s):
        n50_bar.fig.add_trace(go.Bar(x=[idx], y=[n50], name=idx))

    n50_bar.fig.update_layout(
        title=title or n50_bar.title,
        title_x=0.5,
        yaxis_title=ylabel,
    )

    n50_bar.html = n50_bar.fig.to_html(full_html=False, include_plotlyjs='cdn')
    n50_bar.save(settings)
    return [n50_bar]
コード例 #28
0
def compare_sequencing_speed(df, path, settings, title=None):
    logging.info(
        "NanoComp: creating comparison of sequencing speed over time.")
    seq_speed = Plot(path=path + "NanoComp_sequencing_speed_over_time.html",
                     title="Sequencing speed over time")

    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")
    dfs = dfs.loc[dfs["duration"] > 0]

    palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    data = []
    for sample, color in zip(df["dataset"].unique(), palette):
        seqspeed = (dfs.loc[dfs["dataset"] == sample, "lengths"] /
                    dfs.loc[dfs["dataset"] == sample,
                            "duration"]).resample('30T').median()
        data.append(
            go.Scatter(x=seqspeed.index.total_seconds() / 3600,
                       y=seqspeed,
                       opacity=0.75,
                       name=sample,
                       mode='lines',
                       marker=dict(color=color)))

    seq_speed.fig = go.Figure({"data": data})

    seq_speed.fig.update_layout(
        title=title or seq_speed.title,
        title_x=0.5,
        xaxis_title='Interval (hours)',
        yaxis_title="Sequencing speed (nucleotides/second)")

    seq_speed.html = seq_speed.fig.to_html(full_html=False,
                                           include_plotlyjs='cdn')
    seq_speed.save(settings)
    return [seq_speed]
コード例 #29
0
def overlay_histogram(df, path, settings, palette=None):
    """
    Use plotly to create an overlay of length histograms
    Return html code, but also save as figure (format specified)

    Only has 10 colors, which get recycled up to 5 times.
    """
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist = Plot(path=path + "NanoComp_OverlayHistogram.html",
                title="Histogram of read lengths")
    hist.html, hist.fig = plot_overlay_histogram(df,
                                                 palette,
                                                 column='lengths',
                                                 title=hist.title)
    hist.save(settings)

    hist_norm = Plot(path=path + "NanoComp_OverlayHistogram_Normalized.html",
                     title="Normalized histogram of read lengths")
    hist_norm.html, hist_norm.fig = plot_overlay_histogram(
        df, palette, column='lengths', title=hist_norm.title, density=True)
    hist_norm.save(settings)

    log_hist = Plot(path=path + "NanoComp_OverlayLogHistogram.html",
                    title="Histogram of log transformed read lengths")
    log_hist.html, log_hist.fig = plot_log_histogram(df,
                                                     palette,
                                                     title=log_hist.title)
    log_hist.save(settings)

    log_hist_norm = Plot(
        path=path + "NanoComp_OverlayLogHistogram_Normalized.html",
        title="Normalized histogram of log transformed read lengths")
    log_hist_norm.html, log_hist_norm.fig = plot_log_histogram(
        df, palette, title=log_hist_norm.title, density=True)
    log_hist_norm.save(settings)

    return [hist, hist_norm, log_hist, log_hist_norm]
コード例 #30
0
ファイル: compplots.py プロジェクト: RADnovogene/nanoplotter
def overlay_histogram(df, path, palette=None):
    """
    Use plotly to create an overlay of length histograms
    Return html code, but also save as png

    Only has 10 colors, which get recycled up to 5 times.
    """
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist = Plot(path=path + "NanoComp_OverlayHistogram.html",
                title="Histogram of read lengths")
    hist.html, hist.fig = plot_overlay_histogram(df, palette, title=hist.title)
    hist.save()

    hist_norm = Plot(path=path + "NanoComp_OverlayHistogram_Normalized.html",
                     title="Normalized histogram of read lengths")
    hist_norm.html, hist_norm.fig = plot_overlay_histogram(
        df, palette, title=hist_norm.title, histnorm="probability")
    hist_norm.save()

    log_hist = Plot(path=path + "NanoComp_OverlayLogHistogram.html",
                    title="Histogram of log transformed read lengths")
    log_hist.html, log_hist.fig = plot_log_histogram(df,
                                                     palette,
                                                     title=log_hist.title)
    log_hist.save()

    log_hist_norm = Plot(
        path=path + "NanoComp_OverlayLogHistogram_Normalized.html",
        title="Normalized histogram of log transformed read lengths")
    log_hist_norm.html, log_hist_norm.fig = plot_log_histogram(
        df, palette, title=log_hist_norm.title, histnorm="probability")
    log_hist_norm.save()

    return [hist, hist_norm, log_hist, log_hist_norm]