core_genome_size=core_size) snp_heatmap_str = "" for n, snp_table in enumerate(snp_tables): snp_heatmap_str += ''' %s ****************************************************** .. raw:: html %s ''' % (snp_table.split("/")[2], plot_heatmap_snps(snp_table, id=snp_table.split("/")[2] )) spanning_tree_str = "" for n, tree in enumerate(spanning_trees): tree_path = '/'.join(tree.split('/')[1:]) spanning_tree_str += """ %s *************************************************************** .. figure:: %s :alt: %s :width: 60%% """ % (tree.split("/")[3], tree_path, tree.split("/")[3]) report_str = f"""
def write_report(output_file, STYLE, SCRIPT, virulence_reports, blast_files, resistance_reports, spanning_tree_core, low_cov_fasta, ete_figure_counts, mlst_tree, snp_table, core_genome_size, reference_genome_size, virulence_table): import io from docutils.core import publish_file, publish_parts from docutils.parsers.rst import directives multiqc_link = '<a href="%s">MiltiQC</a>' % '/'.join( multiqc_report.split('/')[1:]) table_lowcoverage_contigs = quality_table(low_cov_fasta) table_virulence = virulence_table(virulence_reports, blast_files, ordered_samples) table_resistance = resistance_table(resistance_reports) snp_heatmap = plot_heatmap_snps(snp_table) fraction_core = round( float(core_genome_size) / float(reference_genome_size) * 100, 2) report_str = f""" .. raw:: html {SCRIPT} {STYLE} ============================================================= Diag Pipeline - Complete report ============================================================= .. contents:: :backlinks: none :depth: 2 Quality Control --------------- MultiQC ******* MultiQC aggregate results from bioinformatics analyses across many samples into a single report. The analyses covered here include genome assembly with spades, evaluation of the sequencing depth by mapping of the reads against the assembly and annotation with prokka. .. raw:: html {multiqc_link} Low coverage contigs ******************** .. raw:: html {table_lowcoverage_contigs} Typing ------ MLST ***** The *S. aureus* MLST scheme is based on the sequence of the following seven house-keeping genes: 1. arcC (Carbamate kinase) 2. aroE (Shikimate dehydrogenase) 3. glpF (Glycerol kinase) 4. gmk (Guanylate kinase) 5. pta (Phosphate acetyltransferase) 6. tpi (Triosephosphate isomerase) 7. yqi (Acetyle coenzyme A acetyltransferase) The MLST was determined using the mlst_ software based on PubMLST_ typing schemes. .. _PubMLST: https://pubmlst.org/ .. _mlst: https://github.com/tseemann/mlst Phylogeny + MLST **************** .. figure:: {mlst_tree} :alt: MST tree :width: 40% MLST as determined by T. Seemann mlst_. MS tree (R) ********************* - Size of the reference genome: {reference_genome_size} - Size of the core genome: {core_genome_size} ({fraction_core} % of the reference) .. figure:: {spanning_tree_core} :alt: MST tree :width: 80% Minimum spanning tree including all samples as well as the reference genome. MS tree (js) *********************** .. raw:: html <div id="cy" style="width:80%;height:700px; position: relative; border: 2px solid #212523"></div> SNP table *********** .. raw:: html {snp_heatmap} Virulence (VFDB) ----------------- Overview ********* The identification of virulence factors was performed with BLAST. Only hits exhibiting more than 80% amino acid identity to a known virulence factor from the VFDB database are considered. .. figure:: {ete_figure_counts} :alt: VF tree :width: 50% Number of identified VFs in each genome. Details ******** .. raw:: html {table_virulence} Resistance (RGI/CARD) ---------------------- .. raw:: html {table_resistance} """ with open(output_file, "w") as fh: publish_file( source=io.StringIO(report_str), destination=fh, writer_name="html", settings_overrides={"stylesheet_path": ""}, )