Beispiel #1
0
def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    settings, args = utils.get_args()
    try:
        utils.make_output_dir(args.outdir)
        utils.init_logs(args)
        args.format = nanoplotter.check_valid_format(args.format)
        sources = {
            "fastq": args.fastq,
            "bam": args.bam,
            "cram": args.cram,
            "fastq_rich": args.fastq_rich,
            "fastq_minimal": args.fastq_minimal,
            "summary": args.summary,
            "fasta": args.fasta,
            "ubam": args.ubam,
        }

        if args.pickle:
            datadf = pickle.load(open(args.pickle, 'rb'))
        else:
            datadf = get_input(source=[n for n, s in sources.items() if s][0],
                               files=[f for f in sources.values() if f][0],
                               threads=args.threads,
                               readtype=args.readtype,
                               combine="simple",
                               barcoded=args.barcoded,
                               huge=args.huge)
        if args.store:
            pickle.dump(obj=datadf,
                        file=open(settings["path"] + "NanoPlot-data.pickle",
                                  'wb'))
        if args.raw:
            datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz",
                          sep="\t",
                          index=False,
                          compression="gzip")

        settings["statsfile"] = [make_stats(datadf, settings, suffix="")]
        datadf, settings = filter_and_transform_data(datadf, settings)
        if settings[
                "filtered"]:  # Bool set when filter was applied in filter_and_transform_data()
            settings["statsfile"].append(
                make_stats(datadf[datadf["length_filter"]],
                           settings,
                           suffix="_post_filtering"))

        if args.barcoded:
            barcodes = list(datadf["barcode"].unique())
            plots = []
            for barc in barcodes:
                logging.info("Processing {}".format(barc))
                settings["path"] = path.join(args.outdir,
                                             args.prefix + barc + "_")
                dfbarc = datadf[datadf["barcode"] == barc]
                if len(dfbarc) > 5:
                    settings["title"] = barc
                    plots.extend(make_plots(dfbarc, settings))
                else:
                    sys.stderr.write(
                        "Found barcode {} less than 5x, ignoring...\n".format(
                            barc))
                    logging.info(
                        "Found barcode {} less than 5 times, ignoring".format(
                            barc))
        else:
            plots = make_plots(datadf, settings)
        make_report(plots, settings)
        logging.info("Finished!")
    except Exception as e:
        logging.error(e, exc_info=True)
        print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format(
            __version__))
        print("Please try updating NanoPlot and see if that helps...\n")
        print(
            "If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues"
        )
        print(
            "If you could include the log file that would be really helpful.")
        print("Thanks!\n\n\n")
        raise
Beispiel #2
0
def scatter(x,
            y,
            legacy,
            names,
            path,
            plots,
            color,
            colormap,
            settings,
            stat=None,
            log=False,
            minvalx=0,
            minvaly=0,
            title=None,
            xmax=None,
            ymax=None):
    """->
    create marginalised scatterplots and KDE plot with marginalized histograms
    -> update from scatter_legacy function to utilise plotly package
    - scatterplot with histogram on both axes
    - kernel density plot with histograms on both axes
    - hexbin not implemented yet
    - pauvre plot temporarily not available
    """
    logging.info(
        f"NanoPlot: Creating {names[0]} vs {names[1]} plots using {x.size} reads."
    )
    if not contains_variance([x, y], names):
        return []
    plots_made = []
    idx = np.random.choice(x.index, min(10000, len(x)), replace=False)
    maxvalx = xmax or np.amax(x[idx])
    maxvaly = ymax or np.amax(y[idx])

    if plots["dot"]:
        if log:
            dot_plot = Plot(path=path + "_loglength_dot.html",
                            title=f"{names[0]} vs {names[1]} plot using dots "
                            "after log transformation of read lengths")
        else:
            dot_plot = Plot(path=path + "_dot.html",
                            title=f"{names[0]} vs {names[1]} plot using dots")

        fig = px.scatter(x=x[idx],
                         y=y[idx],
                         marginal_x="histogram",
                         marginal_y="histogram",
                         range_x=[minvalx, maxvalx],
                         range_y=[minvaly, maxvaly])
        fig.update_traces(marker=dict(color=color))
        fig.update_yaxes(rangemode="tozero")
        fig.update_xaxes(rangemode="tozero")

        fig.update_layout(xaxis_title=names[0],
                          yaxis_title=names[1],
                          title=title or dot_plot.title,
                          title_x=0.5)

        if log:
            ticks = [
                10**i for i in range(10) if not 10**i > 10 * (10**maxvalx)
            ]
            fig.update_layout(xaxis=dict(tickmode='array',
                                         tickvals=np.log10(ticks),
                                         ticktext=ticks,
                                         tickangle=45))

        dot_plot.fig = fig
        dot_plot.html = dot_plot.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
        dot_plot.save(settings)
        plots_made.append(dot_plot)

    if plots["kde"]:
        kde_plot = Plot(path=path + "_loglength_kde.html" if log else path +
                        "_kde.html",
                        title=f"{names[0]} vs {names[1]} kde plot")

        col = hex_to_rgb_scale_0_1(color)
        fig = ff.create_2d_density(x[idx],
                                   y[idx],
                                   point_size=3,
                                   hist_color=col,
                                   point_color=col,
                                   colorscale=colormap)

        fig.update_layout(xaxis_title=names[0],
                          yaxis_title=names[1],
                          title=title or kde_plot.title,
                          title_x=0.5,
                          xaxis=dict(tickangle=45))

        if log:
            ticks = [
                10**i for i in range(10) if not 10**i > 10 * (10**maxvalx)
            ]
            fig.update_layout(xaxis=dict(tickmode='array',
                                         tickvals=np.log10(ticks),
                                         ticktext=ticks,
                                         tickangle=45))

        kde_plot.fig = fig
        kde_plot.html = kde_plot.fig.to_html(full_html=False,
                                             include_plotlyjs='cdn')
        kde_plot.save(settings)
        plots_made.append(kde_plot)

    if 1 in legacy.values():
        settings, args = utils.get_args()
        plots_made += scatter_legacy(x=x[idx],
                                     y=y[idx],
                                     names=names,
                                     path=path,
                                     plots=legacy,
                                     color=color,
                                     settings=settings,
                                     stat=stat,
                                     log=log,
                                     minvalx=minvalx,
                                     minvaly=minvaly,
                                     title=title)
    return plots_made