コード例 #1
0
def output_barplot(df, path, settings, title=None):
    """Create barplots based on number of reads and total sum of nucleotides sequenced."""
    logging.info(
        "NanoComp: Creating barplots for number of reads and total throughput."
    )
    read_count = Plot(path=path + "NanoComp_number_of_reads.html",
                      title="Comparing number of reads")

    read_count.fig = go.Figure()

    counts = df['dataset'].value_counts(sort=False).sort_index()
    idx = counts.index

    for idx, count in zip(idx, counts):
        read_count.fig.add_trace(go.Bar(x=[idx], y=[count], name=idx))

    read_count.fig.update_layout(
        title_text=title or read_count.title,
        title_x=0.5,
        yaxis_title="Number of reads",
    )

    read_count.html = read_count.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
    read_count.save(settings)

    throughput_bases = Plot(path=path + "NanoComp_total_throughput.html",
                            title="Comparing throughput in bases")
    if "aligned_lengths" in df:
        throughput = df.groupby('dataset')['aligned_lengths'].sum()
        ylabel = 'Total bases aligned'
    else:
        throughput = df.groupby('dataset')['lengths'].sum()
        ylabel = 'Total bases sequenced'

    idx = df["dataset"].unique()

    throughput_bases.fig = go.Figure()
    for idx, sum_dataset in zip(idx, throughput):
        throughput_bases.fig.add_trace(
            go.Bar(x=[idx], y=[sum_dataset], name=idx))

    throughput_bases.fig.update_layout(
        title=title or throughput_bases.title,
        title_x=0.5,
        yaxis_title=ylabel,
    )

    throughput_bases.html = throughput_bases.fig.to_html(
        full_html=False, include_plotlyjs='cdn')
    throughput_bases.save(settings)

    return read_count, throughput_bases
コード例 #2
0
def overlay_histogram_phred(df, path, settings, palette=None):
    """
    Reads with a perfect alignment and thus a percentIdentity of 100
    get a phred score of Inf
    Which is not cool
    So these are set to 60, a very high phred score
    """
    df["phredIdentity"] = -10 * np.log10(1 - (df["percentIdentity"] / 100))
    df["phredIdentity"][np.isinf(df["phredIdentity"])] = 60

    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist_phred = Plot(path=path + "NanoComp_OverlayHistogram_PhredScore.html",
                      title="Histogram of Phred scores")

    hist_phred.html, hist_phred.fig = plot_overlay_histogram(df,
                                                             palette,
                                                             "phredIdentity",
                                                             hist_phred.title,
                                                             bins=20,
                                                             density=True)

    hist_phred.save(settings)

    return hist_phred
コード例 #3
0
ファイル: nanoplotter_main.py プロジェクト: pythseq/NanoPlot
def yield_by_minimal_length_plot(array,
                                 name,
                                 path,
                                 settings,
                                 title=None,
                                 color="#4CB391"):
    df = pd.DataFrame(data={"lengths": np.sort(array)[::-1]})
    df["cumyield_gb"] = df["lengths"].cumsum() / 10**9
    idx = np.random.choice(array.index, min(10000, len(array)), replace=False)

    yield_by_length = Plot(path=path + "Yield_By_Length.html",
                           title="Yield by length")

    fig = px.scatter(df,
                     x=df.reindex(idx)["lengths"],
                     y=df.reindex(idx)["cumyield_gb"])
    fig.update_traces(marker=dict(color=color))
    fig.update_layout(xaxis_title='Read length',
                      yaxis_title='Cumulative yield for minimal length [Gb]',
                      title=title or yield_by_length.title,
                      title_x=0.5)

    yield_by_length.fig = fig
    yield_by_length.html = yield_by_length.fig.to_html(full_html=False,
                                                       include_plotlyjs='cdn')
    yield_by_length.save(settings)

    return yield_by_length
コード例 #4
0
def spatial_heatmap(array, path, colormap, figformat, title=None):
    """Taking channel information and creating post run channel activity plots."""
    logging.info(
        "Nanoplotter: Creating heatmap of reads per channel using {} reads.".
        format(array.size))

    activity_map = Plot(path=path + ".html",
                        title="Number of reads generated per channel")

    layout = make_layout(maxval=np.amax(array))
    valueCounts = pd.value_counts(pd.Series(array))

    for entry in valueCounts.keys():
        layout.template[np.where(
            layout.structure == entry)] = valueCounts[entry]

    data = pd.DataFrame(layout.template,
                        index=layout.yticks,
                        columns=layout.xticks)

    fig = go.Figure(
        data=go.Heatmap(z=data.values.tolist(), colorscale=colormap))
    fig.update_layout(xaxis_title='Channel',
                      yaxis_title='Number of reads',
                      title=title or activity_map.title,
                      title_x=0.5)

    activity_map.fig = fig
    activity_map.html = activity_map.fig.to_html(full_html=False,
                                                 include_plotlyjs='cdn')
    activity_map.save(figformat)
    return [activity_map]
コード例 #5
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def quality_over_time(dfs, path, settings, title=None, color="#4CB391"):
    time_qual = Plot(path=path + "TimeQualityViolinPlot.html",
                     title="Violin plot of quality over time")

    fig = go.Figure()

    fig.add_trace(
        go.Violin(y=dfs["quals"],
                  x=dfs["timebin"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))

    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Basecall quality',
                      title=title or time_qual.title,
                      title_x=0.5)

    fig.update_xaxes(tickangle=45)

    time_qual.fig = fig
    time_qual.html = time_qual.fig.to_html(full_html=False,
                                           include_plotlyjs='cdn')
    time_qual.save(settings)

    return time_qual
コード例 #6
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def sequencing_speed_over_time(dfs, path, title, settings, color="#4CB391"):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot.html",
                         title="Violin plot of sequencing speed over time")

    mask = dfs['duration'] != 0

    fig = go.Figure()

    fig.add_trace(
        go.Violin(x=dfs.loc[mask, "timebin"],
                  y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))

    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Sequencing speed (nucleotides/second)',
                      title=title or time_duration.title,
                      title_x=0.5)

    fig.update_xaxes(tickangle=45)

    time_duration.fig = fig
    time_duration.html = time_duration.fig.to_html(full_html=False,
                                                   include_plotlyjs='cdn')
    time_duration.save(settings)

    return time_duration
コード例 #7
0
def compare_cumulative_yields(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info(
        "NanoComp: Creating cumulative yield plots using {} reads.".format(
            len(dfs)))
    cum_yield_gb = Plot(path=path +
                        "NanoComp_CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")
    data = []
    annotations = []
    for sample, color in zip(df["dataset"].unique(), palette):
        cumsum = dfs.loc[dfs["dataset"] == sample,
                         "lengths"].cumsum().resample('10T').max() / 1e9
        data.append(
            go.Scatter(x=cumsum.index.total_seconds() / 3600,
                       y=cumsum,
                       opacity=0.75,
                       name=sample,
                       marker=dict(color=color)))
        annotations.append(
            dict(xref='paper',
                 x=0.99,
                 y=cumsum[-1],
                 xanchor='left',
                 yanchor='middle',
                 text='{}Gb'.format(round(cumsum[-1])),
                 showarrow=False))

    cum_yield_gb.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(barmode='overlay',
                      title=title or cum_yield_gb.title,
                      xaxis=dict(title="Time (hours)"),
                      yaxis=dict(title="Yield (gigabase)"),
                      annotations=annotations)
        },
        output_type="div",
        show_link=False)

    cum_yield_gb.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(barmode='overlay',
                  title=title or cum_yield_gb.title,
                  xaxis=dict(title="Time (hours)"),
                  yaxis=dict(title="Yield (gigabase)"),
                  annotations=annotations)
    })
    cum_yield_gb.save()
    return [cum_yield_gb]
コード例 #8
0
def plot_over_time(dfs, path, title, figformat, color="#4CB391"):
    num_reads = Plot(path=path + "NumberOfReads_Over_Time.html",
                     title="Number of reads over time")
    s = dfs.loc[:, "lengths"].resample('10T').count()

    fig = px.scatter(
        data_frame=None,
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Number of reads per 10 minutes',
                      title=title or num_reads.title,
                      title_x=0.5)

    num_reads.fig = fig
    num_reads.html = num_reads.fig.to_html(full_html=False, include_plotlyjs='cdn')
    num_reads.save(figformat)

    plots = [num_reads]

    if "channelIDs" in dfs:
        pores_over_time = Plot(path=path + "ActivePores_Over_Time.html",
                               title="Number of active pores over time")
        s = dfs.loc[:, "channelIDs"].resample('10T').nunique()

        fig = px.scatter(
            data_frame=None,
            x=s.index.total_seconds() / 3600,
            y=s)
        fig.update_traces(marker=dict(color=color))

        fig.update_layout(xaxis_title='Run time (hours)',
                          yaxis_title='Active pores per 10 minutes',
                          title=title or pores_over_time.title,
                          title_x=0.5)

        pores_over_time.fig = fig
        pores_over_time.html = pores_over_time.fig.to_html(full_html=False, include_plotlyjs='cdn')
        pores_over_time.save(figformat)

        plots.append(pores_over_time)
    return plots
コード例 #9
0
def dynamic_histogram(array, name, path, title=None, color="#4CB391"):
    """
    Use plotly to a histogram
    Return html code, but also save as png
    """
    dynhist = Plot(path=path + "Dynamic_Histogram_{}.html".format(name.replace(' ', '_')),
                   title=title or "Dynamic histogram of {}".format(name))
    dynhist.html, dynhist.fig = plotly_histogram(array=array.sample(min(len(array), 10000)),
                                                 color=color,
                                                 title=dynhist.title)
    dynhist.save()
    return dynhist
コード例 #10
0
def cumulative_yield(dfs, path, title, color, figformat):
    cum_yield_gb = Plot(path=path + "CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")

    s = dfs.loc[:, "lengths"].cumsum().resample('10T').max() / 1e9

    fig = px.scatter(
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Cumulative yield in gigabase',
                      title=title or cum_yield_gb.title,
                      title_x=0.5)

    cum_yield_gb.fig = fig
    cum_yield_gb.html = cum_yield_gb.fig.to_html(full_html=False, include_plotlyjs='cdn')
    cum_yield_gb.save(figformat)

    cum_yield_reads = Plot(path=path + "CumulativeYieldPlot_NumberOfReads.html",
                           title="Cumulative yield")

    s = dfs.loc[:, "lengths"].resample('10T').count().cumsum()

    fig = px.scatter(
        x=s.index.total_seconds() / 3600,
        y=s)
    fig.update_traces(marker=dict(color=color))

    fig.update_layout(xaxis_title='Run time (hours)',
                      yaxis_title='Cumulative yield in number of reads',
                      title=title or cum_yield_gb.title,
                      title_x=0.5)

    cum_yield_reads.fig = fig
    cum_yield_reads.html = cum_yield_reads.fig.to_html(full_html=False, include_plotlyjs='cdn')
    cum_yield_reads.save(figformat)

    return [cum_yield_gb, cum_yield_reads]
コード例 #11
0
ファイル: timeplots.py プロジェクト: wdecoster/NanoPlot
def length_over_time(dfs,
                     path,
                     title,
                     settings,
                     log_length=False,
                     color="#4CB391"):
    if log_length:
        time_length = Plot(path=path + "TimeLogLengthViolinPlot.html",
                           title="Violin plot of log read lengths over time")
    else:
        time_length = Plot(path=path + "TimeLengthViolinPlot.html",
                           title="Violin plot of read lengths over time")

    length_column = "log_lengths" if log_length else "lengths"

    if "length_filter" in dfs:  # produced by NanoPlot filtering of too long reads
        temp_dfs = dfs[dfs["length_filter"]]
    else:
        temp_dfs = dfs

    fig = go.Figure()

    fig.add_trace(
        go.Violin(y=temp_dfs[length_column],
                  x=temp_dfs["timebin"],
                  points=False,
                  spanmode="hard",
                  line_color='black',
                  line_width=1.5,
                  fillcolor=color,
                  opacity=0.8))
    fig.update_layout(xaxis_title='Interval (hours)',
                      yaxis_title='Read length',
                      title=title or time_length.title,
                      title_x=0.5)

    if log_length:
        ticks = [
            10**i for i in range(10)
            if not 10**i > 10 * np.amax(dfs["lengths"])
        ]
        fig.update_layout(yaxis=dict(
            tickmode='array', tickvals=np.log10(ticks), ticktext=ticks))

    fig.update_yaxes(tickangle=45)

    time_length.fig = fig
    time_length.html = time_length.fig.to_html(full_html=False,
                                               include_plotlyjs='cdn')
    time_length.save(settings)

    return time_length
コード例 #12
0
def overlay_histogram_identity(df, path, settings, palette=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    hist_pid = Plot(path=path + "NanoComp_OverlayHistogram_Identity.html",
                    title="Histogram of percent reference identity")
    hist_pid.html, hist_pid.fig = plot_overlay_histogram(df,
                                                         palette,
                                                         "percentIdentity",
                                                         hist_pid.title,
                                                         density=True)
    hist_pid.save(settings)

    return hist_pid
コード例 #13
0
def overlay_histogram(df, path, settings, palette=None):
    """
    Use plotly to create an overlay of length histograms
    Return html code, but also save as figure (format specified)

    Only has 10 colors, which get recycled up to 5 times.
    """
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist = Plot(path=path + "NanoComp_OverlayHistogram.html",
                title="Histogram of read lengths")
    hist.html, hist.fig = plot_overlay_histogram(df,
                                                 palette,
                                                 column='lengths',
                                                 title=hist.title)
    hist.save(settings)

    hist_norm = Plot(path=path + "NanoComp_OverlayHistogram_Normalized.html",
                     title="Normalized histogram of read lengths")
    hist_norm.html, hist_norm.fig = plot_overlay_histogram(
        df, palette, column='lengths', title=hist_norm.title, density=True)
    hist_norm.save(settings)

    log_hist = Plot(path=path + "NanoComp_OverlayLogHistogram.html",
                    title="Histogram of log transformed read lengths")
    log_hist.html, log_hist.fig = plot_log_histogram(df,
                                                     palette,
                                                     title=log_hist.title)
    log_hist.save(settings)

    log_hist_norm = Plot(
        path=path + "NanoComp_OverlayLogHistogram_Normalized.html",
        title="Normalized histogram of log transformed read lengths")
    log_hist_norm.html, log_hist_norm.fig = plot_log_histogram(
        df, palette, title=log_hist_norm.title, density=True)
    log_hist_norm.save(settings)

    return [hist, hist_norm, log_hist, log_hist_norm]
コード例 #14
0
ファイル: compplots.py プロジェクト: RADnovogene/nanoplotter
def compare_cumulative_yields(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info(
        "Nanoplotter: Creating cumulative yield plots using {} reads.".format(
            len(dfs)))
    cum_yield_gb = Plot(path=path +
                        "NanoComp_CumulativeYieldPlot_Gigabases.html",
                        title="Cumulative yield")
    data = []
    for d, c in zip(df["dataset"].unique(), palette):
        s = dfs.loc[dfs["dataset"] == d,
                    "lengths"].cumsum().resample('10T').max() / 1e9
        data.append(
            go.Scatter(x=s.index.total_seconds() / 3600,
                       y=s,
                       opacity=0.75,
                       name=d,
                       marker=dict(color=c)))
    cum_yield_gb.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(
                barmode='overlay',
                title=title or cum_yield_gb.title,
                xaxis=dict(title="Time (hours)"),
                yaxis=dict(title="Yield (gigabase)"),
            )
        },
        output_type="div",
        show_link=False)

    cum_yield_gb.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(
            barmode='overlay',
            title=title or cum_yield_gb.title,
            xaxis=dict(title="Time (hours)"),
            yaxis=dict(title="Yield (gigabase)"),
        )
    })
    cum_yield_gb.save()
    return [cum_yield_gb]
コード例 #15
0
ファイル: compplots.py プロジェクト: RADnovogene/nanoplotter
def overlay_histogram(df, path, palette=None):
    """
    Use plotly to create an overlay of length histograms
    Return html code, but also save as png

    Only has 10 colors, which get recycled up to 5 times.
    """
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist = Plot(path=path + "NanoComp_OverlayHistogram.html",
                title="Histogram of read lengths")
    hist.html, hist.fig = plot_overlay_histogram(df, palette, title=hist.title)
    hist.save()

    hist_norm = Plot(path=path + "NanoComp_OverlayHistogram_Normalized.html",
                     title="Normalized histogram of read lengths")
    hist_norm.html, hist_norm.fig = plot_overlay_histogram(
        df, palette, title=hist_norm.title, histnorm="probability")
    hist_norm.save()

    log_hist = Plot(path=path + "NanoComp_OverlayLogHistogram.html",
                    title="Histogram of log transformed read lengths")
    log_hist.html, log_hist.fig = plot_log_histogram(df,
                                                     palette,
                                                     title=log_hist.title)
    log_hist.save()

    log_hist_norm = Plot(
        path=path + "NanoComp_OverlayLogHistogram_Normalized.html",
        title="Normalized histogram of log transformed read lengths")
    log_hist_norm.html, log_hist_norm.fig = plot_log_histogram(
        df, palette, title=log_hist_norm.title, histnorm="probability")
    log_hist_norm.save()

    return [hist, hist_norm, log_hist, log_hist_norm]
コード例 #16
0
def active_pores_over_time(df, path, palette=None, title=None):
    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5
    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")

    logging.info("NanoComp: Creating active pores plot using {} reads.".format(
        len(dfs)))
    active_pores = Plot(path=path + "NanoComp_ActivePoresOverTime.html",
                        title="Active pores over time")
    data = []
    for sample, color in zip(df["dataset"].unique(), palette):
        pores = dfs.loc[dfs["dataset"] == sample,
                        "channelIDs"].resample('10T').nunique()
        data.append(
            go.Scatter(x=pores.index.total_seconds() / 3600,
                       y=pores,
                       opacity=0.75,
                       name=sample,
                       marker=dict(color=color)))

    active_pores.html = plotly.offline.plot(
        {
            "data":
            data,
            "layout":
            go.Layout(
                barmode='overlay',
                title=title or active_pores.title,
                xaxis=dict(title="Time (hours)"),
                yaxis=dict(title="Active pores (per 10 minutes)"),
            )
        },
        output_type="div",
        show_link=False)

    active_pores.fig = go.Figure({
        "data":
        data,
        "layout":
        go.Layout(
            barmode='overlay',
            title=title or active_pores.title,
            xaxis=dict(title="Time (hours)"),
            yaxis=dict(title="Active pores (per 10 minutes)"),
        )
    })
    active_pores.save()
    return active_pores
コード例 #17
0
def dynamic_histogram(array, name, path, figformat, title=None, color="#4CB391"):
    """
    Use plotly to a histogram
    Return html code, but also save as png
    """
    dynhist = Plot(
        path=path + f"Dynamic_Histogram_{name[0].lower() + name[1:].replace(' ', '_')}.html",
        title="Dynamic histogram of {}".format(name[0].lower() + name[1:]))
    ylabel = "Number of reads" if len(array) <= 10000 else "Downsampled number of reads"
    dynhist.html, dynhist.fig = plotly_histogram(array=array.sample(min(len(array), 10000)),
                                                 color=color,
                                                 title=title or dynhist.title,
                                                 xlabel=name,
                                                 ylabel=ylabel)
    dynhist.save(figformat)
    return dynhist
コード例 #18
0
def overlay_histogram_phred(df, path, figformat, palette=None):
    df["phredIdentity"] = -10 * np.log10(1 - (df["percentIdentity"] / 100))

    if palette is None:
        palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    hist_phred = Plot(path=path + "NanoComp_OverlayHistogram_PhredScore.html",
                      title="Histogram of Phred scores")

    hist_phred.html, hist_phred.fig = plot_overlay_histogram(df,
                                                             palette,
                                                             "phredIdentity",
                                                             hist_phred.title,
                                                             bins=20,
                                                             density=True)

    hist_phred.save(figformat=figformat)

    return hist_phred
コード例 #19
0
def n50_barplot(df, path, settings, title=None):
    '''
    Returns Plot object and creates figure(format specified)/html
    containing bar chart of total gb aligned/sequenced read length n50
    '''
    n50_bar = Plot(path=path + "NanoComp_N50.html",
                   title="Comparing read length N50")
    if "aligned_lengths" in df:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "aligned_lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Total gigabase aligned'
    else:
        n50s = [
            get_N50(np.sort(df.loc[df["dataset"] == d, "lengths"]))
            for d in df["dataset"].unique()
        ]
        ylabel = 'Sequenced read length N50'

    idx = df["dataset"].unique()

    n50_bar.fig = go.Figure()

    for idx, n50 in zip(idx, n50s):
        n50_bar.fig.add_trace(go.Bar(x=[idx], y=[n50], name=idx))

    n50_bar.fig.update_layout(
        title=title or n50_bar.title,
        title_x=0.5,
        yaxis_title=ylabel,
    )

    n50_bar.html = n50_bar.fig.to_html(full_html=False, include_plotlyjs='cdn')
    n50_bar.save(settings)
    return [n50_bar]
コード例 #20
0
def compare_sequencing_speed(df, path, settings, title=None):
    logging.info(
        "NanoComp: creating comparison of sequencing speed over time.")
    seq_speed = Plot(path=path + "NanoComp_sequencing_speed_over_time.html",
                     title="Sequencing speed over time")

    dfs = check_valid_time_and_sort(df, "start_time").set_index("start_time")
    dfs = dfs.loc[dfs["duration"] > 0]

    palette = plotly.colors.DEFAULT_PLOTLY_COLORS * 5

    data = []
    for sample, color in zip(df["dataset"].unique(), palette):
        seqspeed = (dfs.loc[dfs["dataset"] == sample, "lengths"] /
                    dfs.loc[dfs["dataset"] == sample,
                            "duration"]).resample('30T').median()
        data.append(
            go.Scatter(x=seqspeed.index.total_seconds() / 3600,
                       y=seqspeed,
                       opacity=0.75,
                       name=sample,
                       mode='lines',
                       marker=dict(color=color)))

    seq_speed.fig = go.Figure({"data": data})

    seq_speed.fig.update_layout(
        title=title or seq_speed.title,
        title_x=0.5,
        xaxis_title='Interval (hours)',
        yaxis_title="Sequencing speed (nucleotides/second)")

    seq_speed.html = seq_speed.fig.to_html(full_html=False,
                                           include_plotlyjs='cdn')
    seq_speed.save(settings)
    return [seq_speed]
コード例 #21
0
ファイル: nanoplotter_main.py プロジェクト: pythseq/NanoPlot
def scatter(x,
            y,
            legacy,
            names,
            path,
            plots,
            color,
            colormap,
            settings,
            stat=None,
            log=False,
            minvalx=0,
            minvaly=0,
            title=None,
            xmax=None,
            ymax=None):
    """->
    create marginalised scatterplots and KDE plot with marginalized histograms
    -> update from scatter_legacy function to utilise plotly package
    - scatterplot with histogram on both axes
    - kernel density plot with histograms on both axes
    - hexbin not implemented yet
    - pauvre plot temporarily not available
    """
    logging.info(
        f"NanoPlot: Creating {names[0]} vs {names[1]} plots using {x.size} reads."
    )
    if not contains_variance([x, y], names):
        return []
    plots_made = []
    idx = np.random.choice(x.index, min(10000, len(x)), replace=False)
    maxvalx = xmax or np.amax(x[idx])
    maxvaly = ymax or np.amax(y[idx])

    if plots["dot"]:
        if log:
            dot_plot = Plot(path=path + "_loglength_dot.html",
                            title=f"{names[0]} vs {names[1]} plot using dots "
                            "after log transformation of read lengths")
        else:
            dot_plot = Plot(path=path + "_dot.html",
                            title=f"{names[0]} vs {names[1]} plot using dots")

        fig = px.scatter(x=x[idx],
                         y=y[idx],
                         marginal_x="histogram",
                         marginal_y="histogram",
                         range_x=[minvalx, maxvalx],
                         range_y=[minvaly, maxvaly])
        fig.update_traces(marker=dict(color=color))
        fig.update_yaxes(rangemode="tozero")
        fig.update_xaxes(rangemode="tozero")

        fig.update_layout(xaxis_title=names[0],
                          yaxis_title=names[1],
                          title=title or dot_plot.title,
                          title_x=0.5)

        if log:
            ticks = [
                10**i for i in range(10) if not 10**i > 10 * (10**maxvalx)
            ]
            fig.update_layout(xaxis=dict(tickmode='array',
                                         tickvals=np.log10(ticks),
                                         ticktext=ticks,
                                         tickangle=45))

        dot_plot.fig = fig
        dot_plot.html = dot_plot.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
        dot_plot.save(settings)
        plots_made.append(dot_plot)

    if plots["kde"]:
        kde_plot = Plot(path=path + "_loglength_kde.html" if log else path +
                        "_kde.html",
                        title=f"{names[0]} vs {names[1]} kde plot")

        col = hex_to_rgb_scale_0_1(color)
        fig = ff.create_2d_density(x[idx],
                                   y[idx],
                                   point_size=3,
                                   hist_color=col,
                                   point_color=col,
                                   colorscale=colormap)

        fig.update_layout(xaxis_title=names[0],
                          yaxis_title=names[1],
                          title=title or kde_plot.title,
                          title_x=0.5,
                          xaxis=dict(tickangle=45))

        if log:
            ticks = [
                10**i for i in range(10) if not 10**i > 10 * (10**maxvalx)
            ]
            fig.update_layout(xaxis=dict(tickmode='array',
                                         tickvals=np.log10(ticks),
                                         ticktext=ticks,
                                         tickangle=45))

        kde_plot.fig = fig
        kde_plot.html = kde_plot.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
        kde_plot.save(settings)
        plots_made.append(kde_plot)

    if 1 in legacy.values():
        settings, args = utils.get_args()
        plots_made += scatter_legacy(x=x[idx],
                                     y=y[idx],
                                     names=names,
                                     path=path,
                                     plots=legacy,
                                     color=color,
                                     settings=settings,
                                     stat=stat,
                                     log=log,
                                     minvalx=minvalx,
                                     minvaly=minvaly,
                                     title=title)
    return plots_made
コード例 #22
0
ファイル: nanoplotter_main.py プロジェクト: pythseq/NanoPlot
def length_plots(array,
                 name,
                 path,
                 settings,
                 title=None,
                 n50=None,
                 color="#4CB391"):
    """Create histogram of normal and log transformed read lengths."""
    logging.info("NanoPlot:  Creating length plots for {}.".format(name))
    maxvalx = np.amax(array)
    if n50:
        logging.info(
            "NanoPlot: Using {} reads with read length N50 of {}bp and maximum of {}bp."
            .format(array.size, n50, maxvalx))
    else:
        logging.info(
            f"NanoPlot:  Using {array.size} reads maximum of {maxvalx}bp.")

    plots = []

    HistType = [{
        'weight': array,
        'name': 'Weighted',
        'ylabel': 'Number of reads'
    }, {
        'weight': None,
        'name': 'Non weighted',
        'ylabel': 'Number of reads'
    }]

    for h_type in HistType:
        histogram = Plot(path=path + h_type["name"].replace(" ", "_") +
                         "Histogram" + name.replace(' ', '') + ".html",
                         title=f"{h_type['name']} histogram of read lengths")

        hist, bin_edges = np.histogram(array,
                                       bins=max(round(int(maxvalx) / 500), 10),
                                       weights=h_type["weight"])

        fig = go.Figure()

        fig.add_trace(go.Bar(x=bin_edges[1:], y=hist, marker_color=color))

        if n50:
            fig.add_vline(n50)
            fig.add_annotation(text='N50', x=n50, y=0.95)
            fig.update_annotations(font_size=8)

        fig.update_layout(xaxis_title='Read length',
                          yaxis_title=h_type["ylabel"],
                          title=title or histogram.title,
                          title_x=0.5)

        histogram.fig = fig
        histogram.html = histogram.fig.to_html(full_html=False,
                                               include_plotlyjs='cdn')
        histogram.save(settings)

        log_histogram = Plot(
            path=path + h_type["name"].replace(" ", "_") +
            "LogTransformed_Histogram" + name.replace(' ', '') + ".html",
            title=h_type["name"] +
            " histogram of read lengths after log transformation")

        if h_type["weight"] is None:
            hist_log, bin_edges_log = np.histogram(
                np.log10(array),
                bins=max(round(int(maxvalx) / 500), 10),
                weights=h_type["weight"])

        else:
            hist_log, bin_edges_log = np.histogram(
                np.log10(array),
                bins=max(round(int(maxvalx) / 500), 10),
                weights=np.log10(h_type["weight"]))

        fig = go.Figure()
        fig.add_trace(
            go.Bar(x=bin_edges_log[1:], y=hist_log, marker_color=color))

        ticks = [10**i for i in range(10) if not 10**i > 10 * maxvalx]

        fig.update_layout(xaxis=dict(tickmode='array',
                                     tickvals=np.log10(ticks),
                                     ticktext=ticks),
                          xaxis_title='Read length',
                          yaxis_title=h_type["ylabel"],
                          title=title or log_histogram.title,
                          title_x=0.5)

        if n50:
            fig.add_vline(np.log10(n50))
            fig.add_annotation(text='N50', x=np.log10(n50), y=0.95)
            fig.update_annotations(font_size=8)

        log_histogram.fig = fig
        log_histogram.html = log_histogram.fig.to_html(full_html=False,
                                                       include_plotlyjs='cdn')
        log_histogram.save(settings)

        plots.extend([histogram, log_histogram])

    plots.append(
        yield_by_minimal_length_plot(array=array,
                                     name=name,
                                     path=path,
                                     title=title,
                                     color=color,
                                     settings=settings))

    return plots
コード例 #23
0
def violin_or_box_plot(df,
                       y,
                       path,
                       y_name,
                       settings,
                       title=None,
                       plot="violin",
                       log=False):
    """Create a violin/boxplot/ridge from the received DataFrame.

    The x-axis should be divided based on the 'dataset' column,
    the y-axis is specified in the arguments
    """
    comp = Plot(path=f"{path}NanoComp_{y.replace(' ', '_')}_{plot}.html",
                title=f"Comparing {y_name.lower()}")

    if plot == 'violin':
        logging.info(f"NanoComp: Creating violin plot for {y}.")

        fig = go.Figure()

        for dataset in df["dataset"].unique():
            fig.add_trace(
                go.Violin(x=df["dataset"][df["dataset"] == dataset],
                          y=df[y][df["dataset"] == dataset],
                          points=False,
                          name=dataset))

        process_violin_and_box(fig,
                               log=log,
                               plot_obj=comp,
                               title=title,
                               y_name=y_name,
                               ymax=np.amax(df[y]),
                               settings=settings)

    elif plot == 'box':
        logging.info("NanoComp: Creating box plot for {}.".format(y))

        fig = go.Figure()

        for dataset in df["dataset"].unique():
            fig.add_trace(
                go.Box(x=df["dataset"][df["dataset"] == dataset],
                       y=df[y][df["dataset"] == dataset],
                       name=dataset))

        process_violin_and_box(fig,
                               log=log,
                               plot_obj=comp,
                               title=title,
                               y_name=y_name,
                               ymax=np.amax(df[y]),
                               settings=settings)

    elif plot == 'ridge':
        logging.info("NanoComp: Creating ridges plot for {}.".format(y))

        fig = go.Figure()

        for d in df["dataset"].unique():
            fig.add_trace(go.Violin(x=df[y][df['dataset'] == d], name=d))

        fig.update_traces(orientation='h',
                          side='positive',
                          width=3,
                          points=False)
        fig.update_layout(title=title or comp.title, title_x=0.5)

        comp.fig = fig
        comp.html = comp.fig.to_html(full_html=False, include_plotlyjs='cdn')
        comp.save(settings)

    else:
        logging.error(f"Unknown comp plot type {plot}")
        sys.exit(f"Unknown comp plot type {plot}")

    return [comp]