Beispiel #1
0
def make_plots(datadf, settings):
    '''
    Call plotting functions from nanoplotter
    settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use
    '''
    color = nanoplotter.check_valid_color(settings["color"])
    plotdict = {
        type: settings["plots"].count(type)
        for type in ["kde", "hex", "dot", 'pauvre']
    }
    plots = []
    if settings["no_N50"]:
        n50 = None
    else:
        n50 = nanomath.get_N50(np.sort(datadf["lengths"]))
    plots.extend(
        nanoplotter.length_plots(array=datadf["lengths"],
                                 name="Read length",
                                 path=settings["path"],
                                 n50=n50,
                                 color=color,
                                 figformat=settings["format"],
                                 title=settings["title"]))
    logging.info("Created length plots")
    if "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(x=datadf[settings["lengths_pointer"]],
                                y=datadf["quals"],
                                names=['Read lengths', 'Average read quality'],
                                path=settings["path"] +
                                settings["length_prefix"] +
                                "LengthvsQualityScatterPlot",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                log=settings["logBool"],
                                title=settings["title"]))
        logging.info("Created LengthvsQual plot")
    if "channelIDs" in datadf:
        plots.extend(
            nanoplotter.spatial_heatmap(array=datadf["channelIDs"],
                                        title=settings["title"],
                                        path=settings["path"] +
                                        "ActivityMap_ReadsPerChannel",
                                        color="Greens",
                                        figformat=settings["format"]))
        logging.info("Created spatialheatmap for succesfull basecalls.")
    if "start_time" in datadf:
        plots.extend(
            nanoplotter.time_plots(df=datadf,
                                   path=settings["path"],
                                   color=color,
                                   figformat=settings["format"],
                                   title=settings["title"]))
        logging.info("Created timeplots.")
    if "aligned_lengths" in datadf and "lengths" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["aligned_lengths"],
                y=datadf["lengths"],
                names=["Aligned read lengths", "Sequenced read length"],
                path=settings["path"] +
                "AlignedReadlengthvsSequencedReadLength",
                figformat=settings["format"],
                plots=plotdict,
                color=color,
                title=settings["title"]))
        logging.info("Created AlignedLength vs Length plot.")
    if "maqpQ" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["mapQ"],
                y=datadf["quals"],
                names=["Read mapping quality", "Average basecall quality"],
                path=settings["path"] + "MappingQualityvsAverageBaseQuality",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"]))
        logging.info("Created MapQvsBaseQ plot.")
        plots.extend(
            nanoplotter.scatter(x=datadf[settings["lengths_pointer"]],
                                y=datadf["mapQ"],
                                names=["Read length", "Read mapping quality"],
                                path=settings["path"] +
                                settings["length_prefix"] +
                                "MappingQualityvsReadLength",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                log=settings["logBool"],
                                title=settings["title"]))
        logging.info("Created Mapping quality vs read length plot.")
    if "percentIdentity" in datadf:
        minPID = np.percentile(datadf["percentIdentity"], 1)
        plots.extend(
            nanoplotter.scatter(x=datadf["percentIdentity"],
                                y=datadf["aligned_quals"],
                                names=["Percent identity", "Read quality"],
                                path=settings["path"] +
                                "PercentIdentityvsAverageBaseQuality",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                stat=stats.pearsonr,
                                minvalx=minPID,
                                title=settings["title"]))
        logging.info("Created Percent ID vs Base quality plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[settings["lengths_pointer"]],
                y=datadf["percentIdentity"],
                names=["Aligned read length", "Percent identity"],
                path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                stat=stats.pearsonr,
                log=settings["logBool"],
                minvaly=minPID,
                title=settings["title"]))
        logging.info("Created Percent ID vs Length plot")
    return plots
Beispiel #2
0
def make_plots(datadf, settings):
    '''
    Call plotting functions from nanoplotter
    settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use
    '''
    plot_settings = dict(font_scale=settings["font_scale"])
    nanoplotter.plot_settings(plot_settings, dpi=settings["dpi"])
    color = nanoplotter.check_valid_color(settings["color"])
    colormap = nanoplotter.check_valid_colormap(settings["colormap"])
    plotdict = {
        type: settings["plots"].count(type)
        for type in ["kde", "hex", "dot", 'pauvre']
    }
    plots = []
    if settings["N50"]:
        n50 = nanomath.get_N50(np.sort(datadf["lengths"]))
    else:
        n50 = None
    plots.extend(
        nanoplotter.length_plots(
            array=datadf[datadf["length_filter"]]["lengths"].astype('uint64'),
            name="Read length",
            path=settings["path"],
            n50=n50,
            color=color,
            figformat=settings["format"],
            title=settings["title"]))
    logging.info("Created length plots")
    if "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(x=datadf[datadf["length_filter"]][
                settings["lengths_pointer"].replace('log_', '')],
                                y=datadf[datadf["length_filter"]]["quals"],
                                names=['Read lengths', 'Average read quality'],
                                path=settings["path"] +
                                "LengthvsQualityScatterPlot",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                title=settings["title"],
                                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["quals"],
                    names=['Read lengths', 'Average read quality'],
                    path=settings["path"] + "LengthvsQualityScatterPlot",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings))
        logging.info("Created LengthvsQual plot")
    if "channelIDs" in datadf:
        plots.extend(
            nanoplotter.spatial_heatmap(array=datadf["channelIDs"],
                                        title=settings["title"],
                                        path=settings["path"] +
                                        "ActivityMap_ReadsPerChannel",
                                        color=colormap,
                                        figformat=settings["format"]))
        logging.info("Created spatialheatmap for succesfull basecalls.")
    if "start_time" in datadf:
        plots.extend(
            nanoplotter.time_plots(df=datadf,
                                   path=settings["path"],
                                   color=color,
                                   figformat=settings["format"],
                                   title=settings["title"],
                                   plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.time_plots(df=datadf,
                                       path=settings["path"],
                                       color=color,
                                       figformat=settings["format"],
                                       title=settings["title"],
                                       log_length=True,
                                       plot_settings=plot_settings))
        logging.info("Created timeplots.")
    if "aligned_lengths" in datadf and "lengths" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]]["aligned_lengths"],
                y=datadf[datadf["length_filter"]]["lengths"],
                names=["Aligned read lengths", "Sequenced read length"],
                path=settings["path"] +
                "AlignedReadlengthvsSequencedReadLength",
                figformat=settings["format"],
                plots=plotdict,
                color=color,
                title=settings["title"],
                plot_settings=plot_settings))
        logging.info("Created AlignedLength vs Length plot.")
    if "mapQ" in datadf and "quals" in datadf:
        plots.extend(
            nanoplotter.scatter(
                x=datadf["mapQ"],
                y=datadf["quals"],
                names=["Read mapping quality", "Average basecall quality"],
                path=settings["path"] + "MappingQualityvsAverageBaseQuality",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                title=settings["title"],
                plot_settings=plot_settings))
        logging.info("Created MapQvsBaseQ plot.")
        plots.extend(
            nanoplotter.scatter(x=datadf[datadf["length_filter"]][
                settings["lengths_pointer"].replace('log_', '')],
                                y=datadf[datadf["length_filter"]]["mapQ"],
                                names=["Read length", "Read mapping quality"],
                                path=settings["path"] +
                                "MappingQualityvsReadLength",
                                color=color,
                                figformat=settings["format"],
                                plots=plotdict,
                                title=settings["title"],
                                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["mapQ"],
                    names=["Read length", "Read mapping quality"],
                    path=settings["path"] + "MappingQualityvsReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    log=True,
                    title=settings["title"],
                    plot_settings=plot_settings))
        logging.info("Created Mapping quality vs read length plot.")
    if "percentIdentity" in datadf:
        minPID = np.percentile(datadf["percentIdentity"], 1)
        if "aligned_quals" in datadf:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf["percentIdentity"],
                    y=datadf["aligned_quals"],
                    names=["Percent identity", "Average Base Quality"],
                    path=settings["path"] +
                    "PercentIdentityvsAverageBaseQuality",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr
                    if not settings["hide_stats"] else None,
                    minvalx=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings))
            logging.info("Created Percent ID vs Base quality plot.")
        plots.extend(
            nanoplotter.scatter(
                x=datadf[datadf["length_filter"]][
                    settings["lengths_pointer"].replace('log_', '')],
                y=datadf[datadf["length_filter"]]["percentIdentity"],
                names=["Aligned read length", "Percent identity"],
                path=settings["path"] + "PercentIdentityvsAlignedReadLength",
                color=color,
                figformat=settings["format"],
                plots=plotdict,
                stat=stats.pearsonr if not settings["hide_stats"] else None,
                minvaly=minPID,
                title=settings["title"],
                plot_settings=plot_settings))
        if settings["logBool"]:
            plots.extend(
                nanoplotter.scatter(
                    x=datadf[datadf["length_filter"]][
                        settings["lengths_pointer"]],
                    y=datadf[datadf["length_filter"]]["percentIdentity"],
                    names=["Aligned read length", "Percent identity"],
                    path=settings["path"] +
                    "PercentIdentityvsAlignedReadLength",
                    color=color,
                    figformat=settings["format"],
                    plots=plotdict,
                    stat=stats.pearsonr
                    if not settings["hide_stats"] else None,
                    log=True,
                    minvaly=minPID,
                    title=settings["title"],
                    plot_settings=plot_settings))

        plots.append(
            nanoplotter.dynamic_histogram(array=datadf["percentIdentity"],
                                          name="percent identity",
                                          path=settings["path"] +
                                          "PercentIdentityHistogram",
                                          title=settings["title"],
                                          color=color))
        logging.info("Created Percent ID vs Length plot")
    return plots
Beispiel #3
0
def makePlots(datadf, lengthprefix, logBool, readlengthsPointer, args):
	'''Call plotting functions'''
	color = nanoplotter.checkvalidColor(args.color)
	plotdict = {type: args.plots.count(type) for type in ["kde", "hex", "dot"]}
	nanoplotter.lengthPlots(
		array=datadf[readlengthsPointer],
		name="Read length",
		path=os.path.join(args.outdir, args.prefix + lengthprefix),
		n50=nanomath.getN50(np.sort(datadf["lengths"])),
		color=color,
		format=args.format,
		log=logBool)
	logging.info("Created length plots")
	nanoplotter.scatter(
		x=datadf[readlengthsPointer],
		y=datadf["quals"],
		names=['Read lengths', 'Average read quality'],
		path=os.path.join(args.outdir, args.prefix + lengthprefix + "LengthvsQualityScatterPlot"),
		color=color,
		format=args.format,
		plots=plotdict,
		log=logBool)
	logging.info("Created LengthvsQual plot")
	if args.fastq_rich or args.summary:
		nanoplotter.spatialHeatmap(
			array=datadf["channelIDs"],
			title="Number of reads generated per channel",
			path=os.path.join(args.outdir, args.prefix + "ActivityMap_ReadsPerChannel"),
			color="Greens",
			format=args.format)
		logging.info("Created spatialheatmap for succesfull basecalls.")
		nanoplotter.timePlots(
			df=datadf,
			path=os.path.join(args.outdir, args.prefix),
			color=color,
			format=args.format)
		logging.info("Created timeplots.")
	if args.bam:
		nanoplotter.scatter(
			x=datadf["aligned_lengths"],
			y=datadf["lengths"],
			names=["Aligned read lengths", "Sequenced read length"],
			path=os.path.join(args.outdir, args.prefix + "AlignedReadlengthvsSequencedReadLength"),
			format=args.format,
			plots=plotdict,
			color=color)
		logging.info("Created AlignedLength vs Length plot.")
		nanoplotter.scatter(
			x=datadf["mapQ"],
			y=datadf["quals"],
			names=["Read mapping quality", "Average basecall quality"],
			path=os.path.join(args.outdir, args.prefix + "MappingQualityvsAverageBaseQuality"),
			color=color,
			format=args.format,
			plots=plotdict)
		logging.info("Created MapQvsBaseQ plot.")
		nanoplotter.scatter(
			x=datadf[readlengthsPointer],
			y=datadf["mapQ"],
			names=["Read length", "Read mapping quality"],
			path=os.path.join(args.outdir, args.prefix + lengthprefix + "MappingQualityvsReadLength"),
			color=color,
			format=args.format,
			plots=plotdict,
			log=logBool)
		logging.info("Created MapQvsBaseQ plot.")
		minPID = np.amin(datadf["percentIdentity"])
		nanoplotter.scatter(
			x=datadf["percentIdentity"],
			y=datadf["aligned_quals"],
			names=["Percent identity", "Read quality"],
			path=os.path.join(args.outdir, args.prefix + "PercentIdentityvsAverageBaseQuality"),
			color=color,
			format=args.format,
			plots=plotdict,
			stat=stats.pearsonr,
			minvalx=minPID)
		logging.info("Created Percent ID Base quality plot.")
		nanoplotter.scatter(
			x=datadf[readlengthsPointer],
			y=datadf["percentIdentity"],
			names=["Aligned read length", "Percent identity"],
			path=os.path.join(args.outdir, args.prefix + "PercentIdentityvsAlignedReadLength"),
			color=color,
			format=args.format,
			plots=plotdict,
			stat=stats.pearsonr,
			log=logBool,
			minvaly=minPID)
		logging.info("Created Percent ID vs Length plot")