Exemple #1
0
    def plot_read_length_distribution(self, data):
        """Plot_read_length_distribution."""
        counts = []
        max_length = 0
        for index, item in enumerate(data['read_lengths']):
            for i in range(item):
                counts.append(index * 50)
                length = index * 50
                if length > max_length:
                    max_length = length

        plot = hist.histogram(
            [counts],
            bins=1000,
            height=300,
            width=400,
            xlim=(0, max_length + 200),
            colors=[Colors.light_cornflower_blue],
            x_axis_label='Length',
            y_axis_label='Count',
        )

        n50 = f"Read n50: {int(data['read_n50'])}"
        self.style_plot(plot)
        self.add_plot_title(plot, data, n50)
        return plot
Exemple #2
0
    def plot_coverage_distribution(self, data):
        """Plot_coverage_distribution."""
        counts = []
        for index, item in enumerate(data['alignment_coverages']):
            for i in range(item):
                counts.append(index)

        plot = hist.histogram(
            [counts],
            bins=101,
            height=300,
            width=400,
            xlim=(0, 100),
            colors=[Colors.light_cornflower_blue],
            x_axis_label='Coverage %',
            y_axis_label='Count',
        )

        cov80_perc = round(data['cov80_percent'], 2)
        cov80 = (
            f"Cov80: {data['cov80_count']} ({cov80_perc}%)"
        )
        self.style_plot(plot)
        self.add_plot_title(plot, data, cov80)
        return plot
Exemple #3
0
def plot_qscore_distribution(fname, mean, data):
    """Plot_qscore_distribution."""
    plot = hist.histogram(
        [data['mean_quality'].tolist()],
        # bins=600,
        height=300,
        width=400,
        xlim=(0, 30),
        colors=[Colors.light_cornflower_blue],
        x_axis_label='Mean Quality',
        y_axis_label='Count',
        title=f"Mean Q-score: {mean}")

    return plot
Exemple #4
0
def plot_read_length_distribution(fname, nseqs, nbases, minl, maxl, data):
    """Plot_read_length_distribution."""
    kilobases = int(nbases // 1000)
    plot = hist.histogram(
        [data['read_length'].tolist()],
        # bins=1000,
        height=300,
        width=400,
        xlim=(0, max(data['read_length']) + 200),
        colors=[Colors.light_cornflower_blue],
        x_axis_label='Length',
        y_axis_label='Count',
        title=f"{nseqs} reads, {kilobases} Kb total, {minl} min, {maxl} max")
    return plot
Exemple #5
0
    def plot_read_length_distribution(self, fname, nseqs, nbases, minl, maxl,
                                      data):
        """Plot_read_length_distribution."""
        plot = hist.histogram(
            [data['read_length'].tolist()],
            # bins=1000,
            height=300,
            width=400,
            xlim=(0, max(data['read_length']) + 200),
            colors=[Colors.light_cornflower_blue],
            x_axis_label='Length',
            y_axis_label='Count',
            title=f"{fname}: {nseqs} seqs, {nbases} bp, {minl} min, {maxl} max"
        )

        self.style_plot(plot)
        return plot
Exemple #6
0
def read_quality_plot(seq_summary):
    """Create read quality plot.

    :param seq_summary: summary data from fastcat.
    """
    datas = [seq_summary['mean_quality']]
    mean_q, median_q = np.mean(datas[0]), np.median(datas[0])
    q_hist = hist.histogram(datas,
                            colors=[Colors.cerulean],
                            bins=100,
                            title="Read quality score",
                            x_axis_label="Quality score",
                            y_axis_label="Number of reads",
                            xlim=(4, 25))
    q_hist = annot.subtitle(
        q_hist, "Mean: {:.0f}. Median: {:.0f}".format(mean_q, median_q))
    return q_hist
Exemple #7
0
    def plot_accuracy_distribution(self, data):
        """Plot_accuracy_distribution."""
        counts = []
        for index, item in enumerate(data['alignment_accuracies']):
            for i in range(item):
                counts.append(index / 10)

        plot = hist.histogram([counts],
                              bins=100,
                              height=300,
                              width=400,
                              xlim=(0, 100),
                              colors=[Colors.cerulean],
                              x_axis_label='Accuracy %',
                              y_axis_label='Number of reads')

        median = f"Median: {data['median_accuracy']}%"
        self.style_plot(plot)
        self.add_plot_title(plot, data, median)
        return plot
Exemple #8
0
    def plot_qscore_distribution(self, data):
        """Plot_qscore_distribution."""
        counts = []
        for index, item in enumerate(data['aligned_qualities']):
            for i in range(item):
                counts.append(index/10)

        plot = hist.histogram(
            [counts],
            bins=600,
            height=300,
            width=400,
            xlim=(0, 30),
            colors=[Colors.light_cornflower_blue],
            x_axis_label='Avg Quality',
            y_axis_label='Count',
        )

        median = f"Median: {int(data['median_quality'])}"
        self.style_plot(plot)
        self.add_plot_title(plot, data, median)
        return plot
Exemple #9
0
def indel_lengths(bcf_stats,
                  header=_indel_length_header,
                  report=None,
                  color=Colors.light_cornflower_blue):
    """Create a report section containing an indel length chart.

    :param bcf_stats: one or more outputs from `bcftools stats`.
    :param header: a markdown formatted header.
    :param report: an HTMLSection instance.
    :param color: color of bars in chart.

    :returns: an HTMLSection instance, if `report` was provided the given
        instance is modified and returned.
    """
    report = _maybe_new_report(report)
    report.markdown(header)
    try:
        df = bcf_stats['IDD']
    except KeyError:
        # If there are no indels, bcftools doesn't contain the table
        report.markdown("*No indels to report.*")
    else:
        df['nlength'] = df['length (deletions negative)'].astype(int)
        df['count'] = df['number of sites'].astype(int)
        # pad just to pull out axes by a minimum
        pad = pd.DataFrame({'nlength': [-10, +10], 'count': [0, 0]})
        counts = df.groupby('nlength') \
            .agg(count=pd.NamedAgg(column='count', aggfunc='sum')) \
            .reset_index().append(pad)
        plot = hist.histogram(
            [counts['nlength']],
            weights=[counts['count']],
            colors=[color],
            binwidth=1,
            title='Insertion and deletion variant lengths',
            x_axis_label='Length / bases (deletions negative)',
            y_axis_label='Count')
        report.plot(plot)
    return report
Exemple #10
0
def read_length_plot(seq_summary, min_len=None, max_len=None):
    """Create a read length plot.

    :param seq_summary: summary data from fastcat.
    :param min_len: minimum length.
    :param max_len: maximum length.

    The minimum and maximum lengths are used only to annotate the plot
    (not filter the data).
    """
    total_bases = seq_summary['read_length'].sum()
    mean_length = total_bases / len(seq_summary)
    median_length = np.median(seq_summary['read_length'])
    datas = [seq_summary['read_length']]
    length_hist = hist.histogram(datas,
                                 colors=[Colors.cerulean],
                                 bins=100,
                                 title="Read length distribution.",
                                 x_axis_label='Read Length / bases',
                                 y_axis_label='Number of reads',
                                 xlim=(0, 2000))
    if min_len is not None:
        length_hist = annot.marker_vline(length_hist,
                                         min_len,
                                         label="Min: {}".format(min_len),
                                         text_baseline='bottom',
                                         color='grey')
    if max_len is not None:
        length_hist = annot.marker_vline(length_hist,
                                         max_len,
                                         label="Max: {}".format(max_len),
                                         text_baseline='top')
    length_hist = annot.subtitle(
        length_hist,
        "Mean: {:.0f}. Median: {:.0f}".format(mean_length, median_length))
    return length_hist
Exemple #11
0
# (The main HTMLReport is logically the first section).
logger.info("Adding gallery")
gallery = report.add_section(key="additional_section")

# Adding a plot
logger.info("Adding points plot")
report.plot(points.points(sorted_xy, sorted_xy[::-1]), key="line_plot")

# Using placeholder is more explicit, and checks will be made before
# rendering that the item has be assigned a real value
report.placeholder("histogram preamble")

# There's no need to provide key (unless require_keys is set). Items added
# without a key cannot be replaced however.
logger.info("Adding histogram")
h = hist.histogram([x - 1, y + 1], colors=['red', 'green'])
h = annot.marker_vline(h, np.mean(x) - 1, label='x values - 1', color='red', text_baseline='bottom')
h = annot.marker_vline(h, np.mean(y) + 1, label='y values + 1', color='green', text_baseline='top')
report.plot(h)

# To delete an item, just delete the key
logger.info("Deleting an item for fun")
report.markdown("Garbage", key='garbage')
del report['garbage']

# Add more plots
logger.info("Adding heatmap")
report.placeholder("heatmap preamble")
report.plot(spatial.heatmap2(x, y))

# Add a data table
Exemple #12
0
def main():
    """Run entry point."""
    parser = argparse.ArgumentParser()
    parser.add_argument("status", help="artic status file")
    parser.add_argument("output", help="Report output filename")
    parser.add_argument("--nextclade", help="nextclade json output file")
    parser.add_argument("--pangolin", help="pangolin CSV output file")
    parser.add_argument("--depths",
                        nargs='+',
                        required=True,
                        help="Depth summary files")
    parser.add_argument("--summaries",
                        nargs='+',
                        required=True,
                        help="Sequencing summary files")
    parser.add_argument("--bcftools_stats",
                        nargs='+',
                        required=True,
                        help="Outputs from bcftools stats")
    parser.add_argument("--genotypes",
                        nargs='+',
                        required=False,
                        help="Genotyping summary files")
    parser.add_argument("--min_cover",
                        default=20,
                        type=int,
                        help="Minimum locus coverage for variant call.")
    parser.add_argument("--min_len",
                        default=300,
                        type=int,
                        help="Minimum read length")
    parser.add_argument("--max_len",
                        default=700,
                        type=int,
                        help="Maximum read length")
    parser.add_argument(
        "--report_depth",
        default=100,
        type=int,
        help=("Depth at which to provide a coverage statistics, "
              "e.g. 76% of genome covered at `report_depth`"))
    parser.add_argument("--hide_coverage",
                        action="store_true",
                        help="Do not display coverage plots in report.")
    parser.add_argument("--hide_variants",
                        action="store_true",
                        help="Do not display variant summary in report.")
    parser.add_argument("--revision",
                        default='unknown',
                        help="git branch/tag of the executed workflow")
    parser.add_argument("--commit",
                        default='unknown',
                        help="git commit of the executed workflow")
    parser.add_argument("--params",
                        default=None,
                        help="A csv containing the parameter key/values")
    parser.add_argument(
        "--versions", help="directory contained CSVs containing name,version.")
    args = parser.parse_args()

    report_doc = report.WFReport("SARS-CoV-2 ARTIC Sequencing report",
                                 "wf-artic",
                                 revision=args.revision,
                                 commit=args.commit)

    section = report_doc.add_section()
    section.markdown('''
### Read Quality control

This section displays basic QC metrics indicating read data quality.
''')
    # read length summary
    seq_summary = read_files(args.summaries)
    total_bases = seq_summary['read_length'].sum()
    mean_length = total_bases / len(seq_summary)
    median_length = np.median(seq_summary['read_length'])
    datas = [seq_summary['read_length']]
    length_hist = hist.histogram(datas,
                                 colors=[Colors.cerulean],
                                 binwidth=50,
                                 title="Read length distribution.",
                                 x_axis_label='Read Length / bases',
                                 y_axis_label='Number of reads',
                                 xlim=(0, 2000))
    length_hist = annot.marker_vline(length_hist,
                                     args.min_len,
                                     label="Min: {}".format(args.min_len),
                                     text_baseline='bottom',
                                     color='grey')
    length_hist = annot.marker_vline(length_hist,
                                     args.max_len,
                                     label="Max: {}".format(args.max_len),
                                     text_baseline='top')
    length_hist = annot.subtitle(
        length_hist,
        "Mean: {:.0f}. Median: {:.0f}".format(mean_length, median_length))

    datas = [seq_summary['mean_quality']]
    mean_q, median_q = np.mean(datas[0]), np.median(datas[0])
    q_hist = hist.histogram(datas,
                            colors=[Colors.cerulean],
                            bins=100,
                            title="Read quality score",
                            x_axis_label="Quality score",
                            y_axis_label="Number of reads",
                            xlim=(4, 25))
    q_hist = annot.subtitle(
        q_hist, "Mean: {:.0f}. Median: {:.0f}".format(mean_q, median_q))

    # barcode count plot
    good_reads = seq_summary.loc[(seq_summary['read_length'] > args.min_len)
                                 & (seq_summary['read_length'] < args.max_len)]
    barcode_counts = (pd.DataFrame(good_reads['sample_name'].value_counts()).
                      sort_index().reset_index().rename(columns={
                          'index': 'sample',
                          'sample_name': 'count'
                      }))

    bc_counts = bars.simple_bar(barcode_counts['sample'].astype(str),
                                barcode_counts['count'],
                                colors=[Colors.cerulean] * len(barcode_counts),
                                title=('Number of reads per barcode '
                                       '(filtered by {} < length < {})'.format(
                                           args.min_len, args.max_len)),
                                plot_width=None)
    bc_counts.xaxis.major_label_orientation = 3.14 / 2
    section.plot(
        layout([[length_hist, q_hist], [bc_counts]],
               sizing_mode="stretch_width"))

    section = report_doc.add_section()
    section.markdown("""
### Artic Analysis status

The panel below lists samples which failed to produce
results from the primary ARTIC analysis. Samples not listed here were analysed
successfully, but may still contain inconclusive or invalid results. See the
following sections for further indications of failed or inconclusive results.
""")
    status = pd.read_csv(args.status, sep='\t')
    failed = status.loc[status['pass'] == 0]
    if len(failed) == 0:
        fail_list = "All samples analysed successfully"
    else:
        fail_list = failed['sample'].str.cat(sep=', ')
    section.markdown("""
```{}```
""".format(fail_list))
    fail_percentage = int(100 * len(failed) / len(status))
    classes = ['Success', 'Analysis Failed']
    values = [100 - fail_percentage, fail_percentage]
    colors = ['#54B8B1', '#EF4135']
    plot = bars.single_hbar(values,
                            classes,
                            colors,
                            title="Completed analyses",
                            x_axis_label="%age Samples")
    plot.x_range = Range1d(0, 140)
    section.plot(plot)

    if not args.hide_coverage:
        section = report_doc.add_section()
        section.markdown('''
### Genome coverage

Plots below indicate depth of coverage from data used
within the Artic analysis coloured by amplicon pool.  Variant filtering during
the ARTIC analysis mandates a minimum coverage of at least {}X at
variant/genotyping loci for a call to be made.

***NB: To better display all possible data, the depth axes of the plots below
are not tied between plots for different samples. Care should be taken in
comparing depth across samples.***
'''.format(args.min_cover))

        # depth summary by amplicon pool
        df = read_files(args.depths)
        plots_pool = list()
        plots_orient = list()
        plots_combined = list()
        depth_lim = args.report_depth
        for sample in sorted(df['sample_name'].unique()):
            bc = df['sample_name'] == sample
            depth = df[bc].groupby('pos').sum().reset_index()
            depth_thresh = \
                100*(depth['depth'] >= depth_lim).sum() / len(depth['depth'])
            depth_mean = depth['depth'].mean()

            # total depth plot
            # plot line just to get aplanat niceities
            p = lines.line(
                [depth['pos']],
                [depth['depth']],
                colors=[Colors.cerulean],
                title="{}: {:.0f}X, {:.1f}% > {}X".format(
                    sample, depth_mean, depth_thresh, depth_lim),
                height=250,
                width=400,
                x_axis_label='position',
                y_axis_label='depth',
            )
            p.varea(x=depth['pos'],
                    y1=0.1,
                    y2=depth['depth'],
                    fill_color=Colors.cerulean)
            plots_combined.append(p)

            # fwd/rev
            xs = [depth['pos'], depth['pos']]
            ys = [depth['depth_fwd'], depth['depth_rev']]
            names = ['fwd', 'rev']
            colors = [Colors.dark_gray, Colors.verdigris]

            p = lines.line(xs,
                           ys,
                           colors=colors,
                           names=names,
                           title="{}: {:.0f}X, {:.1f}% > {}X".format(
                               sample, depth_mean, depth_thresh, depth_lim),
                           height=250,
                           width=400,
                           x_axis_label='position',
                           y_axis_label='depth')
            for x, y, name, color in zip(xs, ys, names, colors):
                p.varea(x=x,
                        y1=0,
                        y2=y,
                        legend_label=name,
                        fill_color=color,
                        alpha=0.7,
                        muted_color=color,
                        muted_alpha=0.2)
            p.legend.click_policy = 'mute'
            plots_orient.append(p)

            # primer set plot
            pset = df['primer_set']
            xs = [df.loc[(pset == i) & bc]['pos'] for i in (1, 2)]
            ys = [df.loc[(pset == i) & bc]['depth'] for i in (1, 2)]
            names = ['pool-1', 'pool-2']
            colors = [Colors.light_cornflower_blue, Colors.feldgrau]

            p = lines.line(xs,
                           ys,
                           colors=colors,
                           names=names,
                           title="{}: {:.0f}X, {:.1f}% > {}X".format(
                               sample, depth_mean, depth_thresh, depth_lim),
                           height=250,
                           width=400,
                           x_axis_label='position',
                           y_axis_label='depth')
            for x, y, name, color in zip(xs, ys, names, colors):
                p.varea(x=x,
                        y1=0,
                        y2=y,
                        legend_label=name,
                        fill_color=color,
                        alpha=0.7,
                        muted_color=color,
                        muted_alpha=0.2)
            p.legend.click_policy = 'mute'
            plots_pool.append(p)

        tab1 = Panel(child=gridplot(plots_combined, ncols=3),
                     title="Coverage Plot")
        tab2 = Panel(child=gridplot(plots_pool, ncols=3),
                     title="By amplicon pool")
        tab3 = Panel(child=gridplot(plots_orient, ncols=3),
                     title="By read orientation")
        cover_panel = Tabs(tabs=[tab1, tab2, tab3])
        section.plot(cover_panel)

    # canned VCF stats report component
    if not args.hide_variants:
        section = report_doc.add_section()
        bcfstats.full_report(args.bcftools_stats, report=section)

    # NextClade analysis
    if args.nextclade is not None:
        section = report_doc.add_section(
            section=nextclade.NextClade(args.nextclade))
        section.markdown(
            "*Note: For targeted sequencing, such as SpikeSeq, Nextclade "
            "may report 'Missing data' QC fails. This is expected and not "
            "a concern provided the regions of interest are not reported "
            "as missing.*")

    # Pangolin analysis
    if args.pangolin is not None:
        section = report_doc.add_section()
        section.markdown('''
### Lineage

The table below reports the lineage of each sample as calculated by
[pangolin](https://github.com/cov-lineages/pangolin).

''')
        section.table(pd.read_csv(args.pangolin), index=False)

    # Genotyping
    if args.genotypes is not None:
        section = report_doc.add_section()
        section.markdown('''
### Genotyping

The table below lists whether candidate variants were determined to exist
within each sample.

The ARTIC workflow pre-filters (removes) candidate variants according to the
criteria `variant_score < 20` and `coverage < 20`. The table draws attention to
reference calls of low coverage (<20 reads) which may therefore be inaccurate.
''')
        df = read_files(args.genotypes, sep=',')
        df = df[[
            'Sample', 'Result', 'Date Tested', 'Lab ID', 'testKit',
            'CH1-Target', 'CH1-Result', 'CH1-Conf'
        ]]
        df = df.sort_values(by=['Sample', 'CH1-Target'], ascending=True)
        section.table(df, index=False)

    section = report_doc.add_section()
    section.markdown('''
### Software versions

The table below highlights versions of key software used within the analysis.
''')
    versions = list()
    if args.versions is not None:
        for fname in os.listdir(args.versions):
            print("Reading versions from file:", fname)
            try:
                with open(os.path.join(args.versions, fname), 'r') as fh:
                    for line in fh.readlines():
                        name, version = line.strip().split(',')
                        versions.append((name, version))
            except Exception as e:
                print(e)
                pass
    versions = pd.DataFrame(versions, columns=('Name', 'Version'))
    section.table(versions, index=False)

    # Params reporting
    section = report_doc.add_section()
    section.markdown('''
### Workflow parameters

The table below highlights values of the main parameters used in this analysis.
''')
    df_params = load_params(args.params)
    section.table(df_params, index=False)

    # write report
    report_doc.write(args.output)
Exemple #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("depth", help="Depth summary file.")
    parser.add_argument("summary", help="Read statistics summary file.")
    parser.add_argument("align_summary", help="Align statistics summary file.")
    parser.add_argument("bcf_stats", help="Output of bcftools stats")
    parser.add_argument("output", help="Report output filename")
    args = parser.parse_args()

    report_doc = report.HTMLReport(
        "Haploid variant calling Summary Report",
        "Results generated through the wf-hap-snp nextflow workflow provided by Oxford Nanopore Technologies"
    )

    section = report_doc.add_section()
    section.markdown('''
### Read Quality control
This section displays basic QC metrics indicating read data quality.
''')

    # read length summary
    seq_summary = pd.read_csv(args.summary, sep='\t')
    total_bases = seq_summary['read_length'].sum()
    mean_length = total_bases / len(seq_summary)
    median_length = np.median(seq_summary['read_length'])
    datas = [seq_summary['read_length']]
    length_hist = hist.histogram(datas,
                                 colors=[Colors.cerulean],
                                 bins=100,
                                 title="Read length distribution.",
                                 x_axis_label='Read Length / bases',
                                 y_axis_label='Number of reads',
                                 xlim=(0, None))
    length_hist = annot.subtitle(
        length_hist,
        "Mean: {:.0f}. Median: {:.0f}".format(mean_length, median_length))

    datas = [seq_summary['acc']]
    mean_q, median_q = np.mean(datas[0]), np.median(datas[0])
    q_hist = hist.histogram(datas,
                            colors=[Colors.cerulean],
                            bins=100,
                            title="Read quality (wrt reference sequence)",
                            x_axis_label="Read Quality",
                            y_axis_label="Number of reads",
                            xlim=(85, 100))
    q_hist = annot.subtitle(
        q_hist, "Mean: {:.0f}. Median: {:.0f}".format(mean_q, median_q))

    section.plot(gridplot([[length_hist, q_hist]]))

    section = report_doc.add_section()
    section.markdown('''
### Genome coverage
Plots below indicate depth of coverage of the coloured by amplicon pool.
For adequate variant calling depth should be at least 50X in any region.
Forward reads are shown in light-blue, reverse reads are dark grey.
''')
    df = pd.read_csv(args.depth, sep='\t')
    plots_orient = list()
    plots_cover = list()
    depth_lim = 50
    for sample in sorted(df['rname'].unique()):
        bc = df['rname'] == sample
        depth = df[bc].groupby('pos')['depth'].sum()
        depth_thresh = 100 * (depth >= depth_lim).sum() / len(depth)

        # fwd/rev
        data = df[bc].groupby('pos').sum().reset_index()  # Is this necessary?
        xs = [data['pos'], data['pos']]
        ys = [data['depth_fwd'], data['depth_rev']]
        plot = points.points(
            xs,
            ys,
            colors=[Colors.light_cornflower_blue, Colors.feldgrau],
            title="{}: {:.0f}X, {:.1f}% > {}X".format(sample, depth.mean(),
                                                      depth_thresh, depth_lim),
            height=300,
            width=800,
            x_axis_label='position',
            y_axis_label='depth',
            ylim=(0, 300))
        plots_orient.append(plot)

        # cumulative coverage
        xs = [data['depth'].sort_values(ascending=False)]
        ys = [np.linspace(1, 100, len(data))]
        plot = lines.line(xs,
                          ys,
                          colors=[Colors.light_cornflower_blue],
                          title="{}: {:.0f}X, {:.1f}% > {}X".format(
                              sample, depth.mean(), depth_thresh, depth_lim),
                          height=300,
                          width=800,
                          x_axis_label='Coverage / bases',
                          y_axis_label='%age of reference')
        plots_cover.append(plot)

    tab1 = Panel(child=gridplot(plots_orient, ncols=1),
                 title="Coverage traces")
    tab2 = Panel(child=gridplot(plots_cover, ncols=1),
                 title="Proportions covered")
    cover_panel = Tabs(tabs=[tab1, tab2])
    section.plot(cover_panel)

    # canned VCF stats report component
    section = report_doc.add_section()
    bcfstats.full_report(args.bcf_stats, report=section)

    # Footer section
    section = report_doc.add_section()
    section.markdown('''
### About

**Oxford Nanopore Technologies products are not intended for use for health assessment
or to diagnose, treat, mitigate, cure or prevent any disease or condition.**

This report was produced using the [epi2me-labs/wf-hap-snp](https://github.com/epi2me-labs/wf-hap-snp).
The workflow can be run using `nextflow epi2me-labs/wf-hap-snp --help`

---
''')

    # write report
    report_doc.write("summary_report.html")