def main(): ''' Organization function -setups logging -gets inputdata -calls plotting function ''' settings, args = utils.get_args() try: utils.make_output_dir(args.outdir) utils.init_logs(args) args.format = nanoplotter.check_valid_format(args.format) sources = { "fastq": args.fastq, "bam": args.bam, "cram": args.cram, "fastq_rich": args.fastq_rich, "fastq_minimal": args.fastq_minimal, "summary": args.summary, "fasta": args.fasta, "ubam": args.ubam, } if args.pickle: datadf = pickle.load(open(args.pickle, 'rb')) else: datadf = get_input(source=[n for n, s in sources.items() if s][0], files=[f for f in sources.values() if f][0], threads=args.threads, readtype=args.readtype, combine="simple", barcoded=args.barcoded, huge=args.huge) if args.store: pickle.dump(obj=datadf, file=open(settings["path"] + "NanoPlot-data.pickle", 'wb')) if args.raw: datadf.to_csv(settings["path"] + "NanoPlot-data.tsv.gz", sep="\t", index=False, compression="gzip") settings["statsfile"] = [make_stats(datadf, settings, suffix="")] datadf, settings = filter_and_transform_data(datadf, settings) if settings[ "filtered"]: # Bool set when filter was applied in filter_and_transform_data() settings["statsfile"].append( make_stats(datadf[datadf["length_filter"]], settings, suffix="_post_filtering")) if args.barcoded: barcodes = list(datadf["barcode"].unique()) plots = [] for barc in barcodes: logging.info("Processing {}".format(barc)) settings["path"] = path.join(args.outdir, args.prefix + barc + "_") dfbarc = datadf[datadf["barcode"] == barc] if len(dfbarc) > 5: settings["title"] = barc plots.extend(make_plots(dfbarc, settings)) else: sys.stderr.write( "Found barcode {} less than 5x, ignoring...\n".format( barc)) logging.info( "Found barcode {} less than 5 times, ignoring".format( barc)) else: plots = make_plots(datadf, settings) make_report(plots, settings) logging.info("Finished!") except Exception as e: logging.error(e, exc_info=True) print("\n\n\nIf you read this then NanoPlot {} has crashed :-(".format( __version__)) print("Please try updating NanoPlot and see if that helps...\n") print( "If not, please report this issue at https://github.com/wdecoster/NanoPlot/issues" ) print( "If you could include the log file that would be really helpful.") print("Thanks!\n\n\n") raise
def scatter(x, y, legacy, names, path, plots, color, colormap, settings, stat=None, log=False, minvalx=0, minvaly=0, title=None, xmax=None, ymax=None): """-> create marginalised scatterplots and KDE plot with marginalized histograms -> update from scatter_legacy function to utilise plotly package - scatterplot with histogram on both axes - kernel density plot with histograms on both axes - hexbin not implemented yet - pauvre plot temporarily not available """ logging.info( f"NanoPlot: Creating {names[0]} vs {names[1]} plots using {x.size} reads." ) if not contains_variance([x, y], names): return [] plots_made = [] idx = np.random.choice(x.index, min(10000, len(x)), replace=False) maxvalx = xmax or np.amax(x[idx]) maxvaly = ymax or np.amax(y[idx]) if plots["dot"]: if log: dot_plot = Plot(path=path + "_loglength_dot.html", title=f"{names[0]} vs {names[1]} plot using dots " "after log transformation of read lengths") else: dot_plot = Plot(path=path + "_dot.html", title=f"{names[0]} vs {names[1]} plot using dots") fig = px.scatter(x=x[idx], y=y[idx], marginal_x="histogram", marginal_y="histogram", range_x=[minvalx, maxvalx], range_y=[minvaly, maxvaly]) fig.update_traces(marker=dict(color=color)) fig.update_yaxes(rangemode="tozero") fig.update_xaxes(rangemode="tozero") fig.update_layout(xaxis_title=names[0], yaxis_title=names[1], title=title or dot_plot.title, title_x=0.5) if log: ticks = [ 10**i for i in range(10) if not 10**i > 10 * (10**maxvalx) ] fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.log10(ticks), ticktext=ticks, tickangle=45)) dot_plot.fig = fig dot_plot.html = dot_plot.fig.to_html(full_html=False, include_plotlyjs='cdn') dot_plot.save(settings) plots_made.append(dot_plot) if plots["kde"]: kde_plot = Plot(path=path + "_loglength_kde.html" if log else path + "_kde.html", title=f"{names[0]} vs {names[1]} kde plot") col = hex_to_rgb_scale_0_1(color) fig = ff.create_2d_density(x[idx], y[idx], point_size=3, hist_color=col, point_color=col, colorscale=colormap) fig.update_layout(xaxis_title=names[0], yaxis_title=names[1], title=title or kde_plot.title, title_x=0.5, xaxis=dict(tickangle=45)) if log: ticks = [ 10**i for i in range(10) if not 10**i > 10 * (10**maxvalx) ] fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.log10(ticks), ticktext=ticks, tickangle=45)) kde_plot.fig = fig kde_plot.html = kde_plot.fig.to_html(full_html=False, include_plotlyjs='cdn') kde_plot.save(settings) plots_made.append(kde_plot) if 1 in legacy.values(): settings, args = utils.get_args() plots_made += scatter_legacy(x=x[idx], y=y[idx], names=names, path=path, plots=legacy, color=color, settings=settings, stat=stat, log=log, minvalx=minvalx, minvaly=minvaly, title=title) return plots_made