Esempio n. 1
0
def main(project_path, seq_summary):
    """
    script to generate QC plots for nanopore run
    :param project_path: (str) path to the project folder (output files will be written here
    :param seq_summary: (str) path and name of the sequencing_summart.txt file
    :return:
    """
    project_path = pathlib.Path(project_path).absolute()
    seq_summary = str(pathlib.Path(seq_summary).absolute())

    plot_path = pathlib.Path(project_path, "QC_plots")
    plot_path.mkdir(mode=0o777, parents=True, exist_ok=True)

    p = pycoQC(seq_summary)

    fig_summary = p.summary()
    summary_fig = str(pathlib.Path(plot_path, "fig_summary.html"))
    plot(fig_summary, filename=summary_fig, show_link=False)

    fig_read_length = p.reads_len_1D()
    read_length_fig = str(pathlib.Path(plot_path, "fig_read_length.html"))
    plot(fig_read_length, filename=read_length_fig, show_link=False)

    fig_read_qual = p.reads_qual_1D()
    read_qual_fig = str(pathlib.Path(plot_path, "fig_read_qual.html"))
    plot(fig_read_qual, filename=read_qual_fig, show_link=False)

    fig_len_qual = p.reads_len_qual_2D()
    len_qual_fig = str(pathlib.Path(plot_path, "fig_len_qual.html"))
    plot(fig_len_qual, filename=len_qual_fig, show_link=False)

    fig_output_over_time = p.output_over_time()
    output_over_time_fig = str(
        pathlib.Path(plot_path, "fig_output_over_time.html"))
    plot(fig_output_over_time, filename=output_over_time_fig, show_link=False)

    fig_len_over_time = p.len_over_time()
    len_over_time_fig = str(pathlib.Path(plot_path, "fig_len_over_time.html"))
    plot(fig_len_over_time, filename=len_over_time_fig, show_link=False)

    fig_qual_over_time = p.qual_over_time()
    qual_over_time_fig = str(pathlib.Path(plot_path,
                                          "fig_qual_over_time.html"))
    plot(fig_qual_over_time, filename=qual_over_time_fig, show_link=False)

    fig_channels_activity = p.channels_activity()
    channels_activity_fig = str(
        pathlib.Path(plot_path, "fig_channels_activity.html"))
    plot(fig_channels_activity,
         filename=channels_activity_fig,
         show_link=False)

    print("Plotting QC stats complete")
Esempio n. 2
0
from pycoQC.pycoQC import pycoQC
from pycoQC.pycoQC_plot import pycoQC_plot

# Import helper functions from pycoQC
from pycoQC.common import jhelp

# Import and setup plotly for offline plotting in Jupyter
from plotly.offline import plot, iplot, init_notebook_mode

init_notebook_mode(connected=False)

# ## Init pycoQC

# In[13]:

p = pycoQC(summary_file, html_outfile=html_outfile)

# In[14]:

html_outfile

# In[15]:

fig = p.summary()
iplot(fig, show_link=False)

# In[16]:

fig = p.reads_len_1D()
iplot(fig, show_link=False)
Esempio n. 3
0
def generate_report(summary_file,
                    barcode_file,
                    outfile,
                    qual=7,
                    filter_calibration=False,
                    config=None,
                    template_file=None,
                    verbose_level=1,
                    title=None):
    """ Runs pycoQC and generates the HTML report"""

    # Parse configuration file
    logger.warning("PARSE CONFIGURATION FILE")
    config_dict = parse_config_file(config)
    logger.debug(config_dict)

    # Initiate pycoQC
    logger.warning("PARSE DATA FILES")
    p = pycoQC(seq_summary_file=summary_file,
               barcode_summary_file=barcode_file,
               verbose_level=verbose_level,
               min_pass_qual=qual,
               filter_calibration=filter_calibration)

    # Loop over configuration file and run the pycoQC functions defined
    logger.warning("GENERATES PLOTS")
    plots = list()
    titles = list()
    for method_name, method_args in config_dict.items():

        # Check if method exists and is callable
        if not method_name in plot_methods:
            logger.info(
                "\tWarning: Method {} is defined in configuration but not supported"
                .format(method_name))

        try:
            logger.info("\tRunning method {}".format(method_name))
            logger.debug("\t{} ({})".format(method_name, method_args))

            # Store plot title for HTML tittle and remove from data passed to plotly
            plot_title = method_args["plot_title"]
            method_args["plot_title"] = ""

            # Get method and generate plot
            method = getattr(p, method_name)
            fig = method(**method_args)
            plot = py.plot(fig,
                           output_type='div',
                           include_plotlyjs=False,
                           image_width='',
                           image_height='',
                           show_link=False,
                           auto_open=False)

            plots.append(plot)
            titles.append(plot_title)

        except pycoQCError as E:
            logger.info("\t\t{}".format(E))

    logger.warning("WRITE HTML REPORT")

    # Load HTML template for Jinja
    logger.info("\tLoad HTML template")
    template = get_jinja_template(template_file)

    # Set a title for the HTML report
    report_title = ""
    if title:
        report_title += title + "<br>"
    report_title += "generated on " + datetime.datetime.now().strftime(
        "%d/%m/%y")

    # # Calculate SHA checksum and pass it to template
    # with open(summary_file, 'rb') as f:
    #     contents = f.read()
    # summary_file_hash = hashlib.sha256(contents).hexdigest()

    # Render plots
    logger.info("\tRender plots with Jinja2")
    rendering = template.render(plots=plots,
                                titles=titles,
                                plotlyjs=py.get_plotlyjs(),
                                report_title=report_title)

    # Write to HTML file
    logger.info("\tWrite to HTML file")
    with open(outfile, "w") as f:
        f.write(rendering)
Esempio n. 4
0
def main_pycoQC(args=None):
    if args is None:
        args = sys.argv[1:]

    # Define parser object
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""
            pycoQC computes metrics and generates interactive QC plots from the sequencing summary report generated by Oxford Nanopore technologies basecallers\n
            * Minimal usage
                pycoQC -f sequencing_summary.txt -o pycoQC_output.html
            * Including Guppy barcoding file + html output + json output
                pycoQC -f sequencing_summary.txt -b barcoding_sequencing.txt -o pycoQC_output.html -j pycoQC_output.json
            * Including Bam file + html output
                pycoQC -f sequencing_summary.txt -a alignment.bam -o pycoQC_output.html"""
                                    ))
    parser.add_argument('--version',
                        action='version',
                        version="{} v{}".format(package_name, package_version))

    # Define arguments
    parser_io = parser.add_argument_group('Input/output options')
    parser_io.add_argument(
        "--summary_file",
        "-f",
        default=[],
        nargs='*',
        help=textwrap.dedent(
            """Path to a sequencing_summary generated by Albacore 1.0.0 + (read_fast5_basecaller.py) / Guppy 2.1.3+ (guppy_basecaller).
            One can also pass multiple space separated file paths or a UNIX style regex matching multiple files (Required)"""
        ))
    parser_io.add_argument(
        "--barcode_file",
        "-b",
        default=[],
        nargs='*',
        help=textwrap.dedent(
            """Path to the barcode_file generated by Guppy 2.1.3+ (guppy_barcoder) or Deepbinner 0.2.0+. This is not a required file.
        One can also pass multiple space separated file paths or a UNIX style regex matching multiple files (optional)"""
        ))
    parser_io.add_argument(
        "--bam_file",
        "-a",
        default=[],
        nargs='*',
        help=textwrap.dedent(
            """Path to a Bam file corresponding to reads in the summary_file. Preferably aligned with Minimap2
          One can also pass multiple space separated file paths or a UNIX style regex matching multiple files (optional)"""
        ))
    parser_io.add_argument(
        "--html_outfile",
        "-o",
        default="",
        type=str,
        help=
        "Path to an output html file report (required if json_outfile not given)"
    )
    parser_io.add_argument(
        "--json_outfile",
        "-j",
        default="",
        type=str,
        help=
        "Path to an output json file report (required if html_outfile not given)"
    )
    parser_filt = parser.add_argument_group('Filtering options')
    parser_filt.add_argument(
        "--min_pass_qual",
        default=7,
        type=float,
        help=
        "Minimum quality to consider a read as 'pass' (default: %(default)s)")
    parser_filt.add_argument(
        "--min_pass_len",
        default=0,
        type=int,
        help=
        "Minimum read length to consider a read as 'pass' (default: %(default)s)"
    )
    parser_filt.add_argument(
        "--filter_calibration",
        default=False,
        action='store_true',
        help=
        "If given, reads flagged as calibration strand by the basecaller are removed (default: %(default)s)"
    )
    parser_filt.add_argument(
        "--filter_duplicated",
        default=False,
        action='store_true',
        help=
        "If given, duplicated read_ids are removed but the first occurence is kept (Guppy sometimes outputs the same read multiple times) (default: %(default)s)"
    )
    parser_filt.add_argument(
        "--min_barcode_percent",
        default=0.1,
        type=float,
        help=
        "Minimal percent of total reads to retain barcode label. If below, the barcode value is set as `unclassified` (default: %(default)s)"
    )
    parser_html = parser.add_argument_group('HTML report options')
    parser_html.add_argument(
        "--report_title",
        default="PycoQC report",
        type=str,
        help="Title to use in the html report (default: %(default)s)")
    parser_html.add_argument(
        "--template_file",
        type=str,
        default="",
        help="Jinja2 html template for the html report (default: %(default)s)")
    parser_html.add_argument(
        "--config_file",
        type=str,
        default="",
        help=textwrap.dedent(
            """Path to a JSON configuration file for the html report.
            If not provided, looks for it in ~/.pycoQC and ~/.config/pycoQC/config. If it's still not found, falls back to default parameters.
            The first level keys are the names of the plots to be included.
            The second level keys are the parameters to pass to each plotting function (default: %(default)s)")"""
        ))
    parser_other = parser.add_argument_group('Other options')
    parser_other.add_argument(
        "--sample",
        default=100000,
        type=int,
        help=
        "If not None a n number of reads will be randomly selected instead of the entire dataset for ploting function (deterministic sampling) (default: %(default)s)"
    )
    parser_other.add_argument(
        "--default_config",
        "-d",
        action='store_true',
        help=
        "Print default configuration file. Can be used to generate a template JSON file (default: %(default)s)"
    )
    parser_verbosity = parser.add_mutually_exclusive_group()
    parser_verbosity.add_argument("-v",
                                  "--verbose",
                                  action="store_true",
                                  default=False,
                                  help="Increase verbosity")
    parser_verbosity.add_argument("-q",
                                  "--quiet",
                                  action="store_true",
                                  default=False,
                                  help="Reduce verbosity")

    # Try to parse arguments
    args = parser.parse_args()

    # Set logging level
    logger = get_logger(name=__name__, verbose=args.verbose, quiet=args.quiet)

    # Print the default config parameters and exit
    if args.default_config:
        config_file = resource_filename("pycoQC",
                                        "templates/pycoQC_config.json")
        with open(config_file) as fp:
            sys.stdout.write(fp.read())
        sys.exit()

    elif not args.summary_file:
        logger.warning("ERROR: `--summary_file` is a required argument")
        parser.print_help()
        sys.exit()

    elif not args.html_outfile and not args.json_outfile:
        logger.warning(
            "ERROR: At least one output file required `--html_outfile` or `--json_outfile`"
        )
        parser.print_help()
        sys.exit()

    # Run pycoQC
    pycoQC(summary_file=args.summary_file,
           barcode_file=args.barcode_file,
           bam_file=args.bam_file,
           filter_calibration=args.filter_calibration,
           filter_duplicated=args.filter_duplicated,
           min_barcode_percent=args.min_barcode_percent,
           min_pass_qual=args.min_pass_qual,
           min_pass_len=args.min_pass_len,
           sample=args.sample,
           html_outfile=args.html_outfile,
           report_title=args.report_title,
           config_file=args.config_file,
           template_file=args.template_file,
           json_outfile=args.json_outfile,
           verbose=args.verbose,
           quiet=args.quiet)
Esempio n. 5
0
def main_pycoQC(args=None):
    if args is None:
        args = sys.argv[1:]

    # Define parser object
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""
            pycoQC computes metrics and generates interactive QC plots from the sequencing summary report generated by Oxford Nanopore technologies basecallers\n
            * Minimal usage
                pycoQC -f sequencing_summary.txt -o pycoQC_output.html
            * Including Guppy barcoding file and json output
                pycoQC -f sequencing_summary.txt -b barcoding_sequencing.txt -o pycoQC_output.html -j pycoQC_output.json"""
                                    ))

    parser.add_argument('--version',
                        action='version',
                        version="{} v{}".format(package_name, package_version))

    # Define arguments
    parser_io = parser.add_argument_group('Input/output options')
    parser_io.add_argument(
        "--summary_file",
        "-f",
        default=None,
        type=str,
        nargs='+',
        help=textwrap.dedent(
            """Path to the sequencing_summary generated by Albacore 1.0.0 + (read_fast5_basecaller.py) / Guppy 2.1.3+ (guppy_basecaller).
        One can also pass multiple space separated file paths or a UNIX style regex matching multiple files (Required)"""
        ))
    parser_io.add_argument(
        "--barcode_file",
        "-b",
        default=None,
        type=str,
        nargs='+',
        help=textwrap.dedent(
            """Path to the barcode_summary_file generated by Guppy 2.1.3+ (guppy_barcoder)..
        One can also pass multiple space separated file paths or a UNIX style regex matching multiple files (optional)"""
        ))
    parser_io.add_argument(
        "--html_outfile",
        "-o",
        default=None,
        type=str,
        help="Path to an output html file (required if json_outfile not given)"
    )
    parser_io.add_argument(
        "--json_outfile",
        "-j",
        default=None,
        type=str,
        help="Path to an output json file (required if html_outfile not given)"
    )
    parser_filt = parser.add_argument_group('Filtering options')
    parser_filt.add_argument(
        "--min_pass_qual",
        "-q",
        default=7,
        type=int,
        help=
        "Minimum quality to consider a read as 'pass' (default: %(default)s)")
    parser_filt.add_argument(
        "--filter_calibration",
        default=False,
        action='store_true',
        help=
        "If given reads flagged as calibration strand by the basecaller are removed (default: %(default)s)"
    )
    parser_filt.add_argument(
        "--min_barcode_percent",
        default=0.1,
        type=float,
        help=
        "Minimal percent of total reads to retain barcode label. If below the barcode value is set as `unclassified` (default: %(default)s)"
    )
    parser_html = parser.add_argument_group('HTML report options')
    parser_html.add_argument(
        "--title",
        "-t",
        default=None,
        type=str,
        help="A title to be used in the html report (default: %(default)s)")
    parser_html.add_argument(
        "--template_file",
        type=str,
        default=None,
        help="Jinja2 html template for the html report (default: %(default)s)")
    parser_html.add_argument(
        "--config",
        "-c",
        type=str,
        default=None,
        help=textwrap.dedent(
            """Path to a JSON configuration file for the html report.
        If not provided, looks for it in ~/.pycoQC and ~/.config/pycoQC/config. If it's still not found, falls back to default parameters.
        The first level keys are the names of the plots to be included.
        The second level keys are the parameters to pass to each plotting function (default: %(default)s)")"""
        ))
    parser_other = parser.add_argument_group('Other options')
    parser_other.add_argument(
        "--default_config",
        "-d",
        action='store_true',
        help=
        "Print default configuration file. Can be used to generate a template JSON file (default: %(default)s)"
    )
    parser_other.add_argument(
        "--list_plots",
        "-l",
        default=None,
        action='store_true',
        help=
        "Print the list of available plotting functions and exit (default: %(default)s)"
    )
    parser_other.add_argument(
        "--verbose_level",
        "-v",
        choices=[2, 1, 0],
        type=int,
        default=1,
        help=
        "Level of verbosity, from 2 (Chatty) to 0 (Nothing) (default: %(default)s)"
    )

    # Try to parse arguments
    args = parser.parse_args()

    # Set logging level
    logLevel_dict = {2: logging.DEBUG, 1: logging.INFO, 0: logging.WARNING}
    logger.setLevel(logLevel_dict.get(args.verbose_level, logging.INFO))

    # Print the default config parameters and exit
    if args.default_config:
        json.dump(_default_config(), sys.stdout, indent=2)

    # Print the names of valid pycoQC plotting functions
    elif args.list_plots:
        logger.warning("Available pycoQC methods")
        for i in PLOT_METHODS:
            logger.warning("* {}".format(i))

    elif not args.summary_file:
        logger.warning("ERROR: `--summary_file` is a required argument")
        parser.print_help()

    elif not args.html_outfile and not args.json_outfile:
        logger.warning(
            "ERROR: At least one output file required `--html_outfile` or `--json_outfile`"
        )
        parser.print_help()

    # Run pycoQC
    else:
        # Initiate pycoQC
        logger.warning("PARSING DATA FILES")
        pycoqc_res = pycoQC(seq_summary_file=args.summary_file,
                            barcode_summary_file=args.barcode_file,
                            verbose_level=args.verbose_level,
                            min_pass_qual=args.min_pass_qual,
                            filter_calibration=args.filter_calibration,
                            min_barcode_percent=args.min_barcode_percent)

        # Run html output function
        if args.html_outfile:
            logger.warning("GENERATING HTML REPORT")
            generate_html_report(pycoqc_res=pycoqc_res,
                                 outfile=args.html_outfile,
                                 config=args.config,
                                 template_file=args.template_file,
                                 title=args.title)

        # Run json output function
        if args.json_outfile:
            logger.warning("GENERATING JSON REPORT")
            generate_json_report(pycoqc_res=pycoqc_res,
                                 outfile=args.json_outfile)