- Size of the reference genome: %s
    - Size of the core genome: %s (%s %% of the reference)
    """ % (ref_size,
           core_size,
           fraction_core)
else:
    core_size = False
    core_str = ""

multiqc_table = report.get_multiqc_table(multiqc_assembly,
                                         multiqc_mapping_list)

table_lowcoverage_contigs = quality_table(low_cov_fastas,
                                          sample2gc,
                                          sample2median_depth,
                                          sampls2cumulated_size,
                                          sample2n_contigs,
                                          sample2scientific_name,
                                          undetermined_snps_files=undetermined_snp_tables,
                                          core_genome_size=core_size)

snp_heatmap_str = ""
for n, snp_table in enumerate(snp_tables):
    snp_heatmap_str += '''

%s
******************************************************

.. raw:: html

    %s
Example #2
0
    sample2gc[sample] = data_whole_gnome["gc_content"]
    sample2median_depth[sample] = data_whole_gnome["mean_depth"]
    sampls2cumulated_size[sample] = data_whole_gnome["contig_size"]
    sample2n_contigs[sample] = n_contigs
    sampls2cumulated_size_filtered[sample] = int(
        table.query('median_depth>=5 & contig != "TOTAL"')[["contig_size"
                                                            ]].sum())

sample2scientific_name = snakemake.params["sample_table"].to_dict(
)["ScientificName"]

table_lowcoverage_contigs = report.quality_table(
    low_cov_fastas,
    sample2gc,
    sample2median_depth,
    sampls2cumulated_size,
    sampls2cumulated_size_filtered,
    sample2n_contigs,
    sample2scientific_name,
    low_cov_detail=low_cov_detail)

table_virulence = report.virulence_table(virulence_reports, blast_files,
                                         ordered_samples)

mash_table = report.get_mash_table(mash_results, mash_detail,
                                   sample2scientific_name)

centrifuge_table = report.get_centrifuge_table(centrifuge_tables,
                                               sample2scientific_name)

multiqc_table = report.get_multiqc_table(assembly_multiqc=multiqc_assembly)
def write_report(output_file, STYLE, SCRIPT, virulence_reports, blast_files,
                 resistance_reports, spanning_tree_core, low_cov_fasta,
                 ete_figure_counts, mlst_tree, snp_table, core_genome_size,
                 reference_genome_size, virulence_table):
    import io
    from docutils.core import publish_file, publish_parts
    from docutils.parsers.rst import directives

    multiqc_link = '<a href="%s">MiltiQC</a>' % '/'.join(
        multiqc_report.split('/')[1:])
    table_lowcoverage_contigs = quality_table(low_cov_fasta)
    table_virulence = virulence_table(virulence_reports, blast_files,
                                      ordered_samples)
    table_resistance = resistance_table(resistance_reports)
    snp_heatmap = plot_heatmap_snps(snp_table)
    fraction_core = round(
        float(core_genome_size) / float(reference_genome_size) * 100, 2)

    report_str = f"""

.. raw:: html

    {SCRIPT}

    {STYLE}

=============================================================
Diag Pipeline - Complete report
=============================================================

.. contents::
    :backlinks: none
    :depth: 2

Quality Control
---------------

MultiQC
*******

MultiQC aggregate results from bioinformatics analyses across many samples into a single report.
The analyses covered here include genome assembly with spades, evaluation of the sequencing
depth by mapping of the reads against the assembly and annotation with prokka.


.. raw:: html

    {multiqc_link}

Low coverage contigs
********************

.. raw:: html

    {table_lowcoverage_contigs}

Typing
------

MLST
*****

The *S. aureus* MLST scheme is based on the sequence of the following seven house-keeping genes:

1. arcC (Carbamate kinase)
2. aroE (Shikimate dehydrogenase)
3. glpF (Glycerol kinase)
4. gmk (Guanylate kinase)
5. pta (Phosphate acetyltransferase)
6. tpi (Triosephosphate isomerase)
7. yqi (Acetyle coenzyme A acetyltransferase)

The MLST was determined using the mlst_ software based on PubMLST_ typing schemes.

.. _PubMLST: https://pubmlst.org/
.. _mlst: https://github.com/tseemann/mlst

Phylogeny + MLST
****************

.. figure:: {mlst_tree}
   :alt: MST tree
   :width: 40%

   MLST as determined by T. Seemann mlst_.

MS tree (R)
*********************

- Size of the reference genome: {reference_genome_size}
- Size of the core genome: {core_genome_size} ({fraction_core} % of the reference)


.. figure:: {spanning_tree_core}
   :alt: MST tree
   :width: 80%

   Minimum spanning tree including all samples as well as the reference genome.

MS tree (js)
***********************

.. raw:: html

    <div id="cy" style="width:80%;height:700px; position: relative; border: 2px solid #212523"></div>

SNP table
***********

.. raw:: html

    {snp_heatmap}

Virulence (VFDB)
-----------------

Overview
*********

The identification of virulence factors was performed with BLAST. Only hits exhibiting more
than 80% amino acid identity to a known virulence factor from the VFDB database are considered.

.. figure:: {ete_figure_counts}
   :alt: VF tree
   :width: 50%

   Number of identified VFs in each genome.

Details
********

.. raw:: html

    {table_virulence}

Resistance (RGI/CARD)
----------------------

.. raw:: html

    {table_resistance}

"""
    with open(output_file, "w") as fh:
        publish_file(
            source=io.StringIO(report_str),
            destination=fh,
            writer_name="html",
            settings_overrides={"stylesheet_path": ""},
        )