def continuous_stagewise_separate(): arrsum = ArraySummary("Continuous vs Stage-wise vs Separate") with arrsum.create(tex.Section("Overview")): arrsum.add_tabular(get_tab_data(data, parameters, by_exp_type, by_reward, get_std, name="STD")) for reward_type in ["minimize", "maximize", "target", "range"]: grouped_by_reward_params = group_parameters_by(parameters_by_reward_type[reward_type], lambda x: tuple(x[reward_type + "_pred_err"]) if type(x[reward_type + "_pred_err"]) == list else x[reward_type + "_pred_err"]) for k in grouped_by_reward_params: with arrsum.create(tex.Section(reward_type + str(k))): groups = group_parameters_by(grouped_by_reward_params[k], lambda x: "stages" if x["continuous"] is None else "continuous") with arrsum.create(tex.Subsection("Stages")): sub_groups = group_parameters_by(groups["stages"], lambda x: "separate" if x["separate"] else "joint") with arrsum.create(tex.Subsubsection("Separate")): for p in sub_groups["separate"]: arrsum.add_experiment(p) with arrsum.create(tex.Subsubsection("Joint")): for p in sub_groups["joint"]: arrsum.add_experiment(p) arrsum.clearpage() with arrsum.create(tex.Subsection("Continuous")): sub_groups = group_parameters_by(groups["continuous"], lambda x: "separate" if x["separate"] else "joint") with arrsum.create(tex.Subsubsection("Separate")): for p in sub_groups["separate"]: arrsum.add_experiment(p) with arrsum.create(tex.Subsubsection("Joint")): for p in sub_groups["joint"]: arrsum.add_experiment(p) arrsum.clearpage() arrsum.generate_pdf(filepath=path_to_array + "new_impl", clean_tex=False)
def finalize(self): doc = util.create_doc( f"Correlator Fits: {self.ensemble_name} - {self.task_name}") for operator_set, operator_fits in self.operator_fits.items(): with doc.create(pylatex.Section(str(operator_set))): for operator, fits in operator_fits.items(): with doc.create(pylatex.Subsection(str(operator))): for fit, fit_infos in fits.items(): # normal fits logfile = self.logfile(operator_set, operator, fit.name) fit_log = sigmond_info.sigmond_log.FitLog(logfile) if self.fit_plots: plotdir = self.fit_plotdir( operator_set, operator, fit.name) util.dirGrace2pdf(plotdir) section_title = f"{fit.name} - Model: {fit.model.short_name}" with doc.create( pylatex.Subsubsection(section_title)): self._add_fits(doc, fit_log, fit.name, operator_set, fit.ratio, fit.model.has_gap, fit.model.has_const) # tmin fits if self.tmin_plots: plotdir = self.tmin_fit_plotdir( operator_set, operator, fit.name) util.dirGrace2pdf(plotdir) tmin_fit_infos = list() for fit_info in fit_infos['tmin']: plotfile = self.tmin_fit_plotfile( operator_set, fit.name, fit_info, extension=util.PlotExtension.pdf) if os.path.isfile(plotfile): tmin_fit_infos.append(fit_info) if len(tmin_fit_infos) == 0: continue tmin_fit_infos.sort( key=lambda fit_info: fit_info.tmax) section_title = f"$t_{{\\rm min}}$ plots - {fit.name} - Model: {fit.model.short_name}" with doc.create( pylatex.Subsubsection( pylatex.NoEscape(section_title))): self._add_tmins(doc, tmin_fit_infos, fit.name, operator_set, fit.ratio) results_dir = self.results_dir os.makedirs(results_dir, exist_ok=True) filename = os.path.join(results_dir, self.task_name) util.compile_pdf(doc, filename, self.latex_compiler)
def _add_string_notes(document: pylatex.Document, texts: dict, images: dict) -> None: document.append( pylatex.Subsection( title=texts["strings"]["title"], label=False, numbering=False, )) document.append( pylatex.Subsubsection( title=texts["strings"]["subtitle0"], label=False, numbering=False, )) document.append(texts["strings"]["text0"]) document.append( pylatex.Subsubsection( title=texts["microtonal_notation"]["title"], label=False, numbering=False, )) document.append(texts["microtonal_notation"]["text0"]) document.append(_make_img(images["twelfth_tone_explanation"])) document.append(texts["microtonal_notation"]["text1"]) for instrument in ("violin", "viola", "cello"): document.append(_make_img(images["scale_{}".format(instrument)])) document.append( _make_img( images["scale_{}_artificial_harmonics".format(instrument)])) document.append( pylatex.Command("hspace", arguments=[pylatex.NoEscape("5mm")])) document.append(texts["microtonal_notation"]["text2"]) document.append( pylatex.Subsubsection( title=texts["strings"]["subtitle1"], label=False, numbering=False, )) document.append(_make_img(images["ornamentation"], width=0.25)) document.append(texts["strings"]["text1"]) document.append(_make_img(images["glissando"], width=0.28)) document.append(texts["strings"]["text2"])
def addPlotsToPDF(self, doc, data_files, operators, name): obs_handler, _ = util.get_obs_handlers(data_files, self.bins_info, self.sampling_info) corr_plotsdir = self.correlator_plotdir(name) energy_plotsdir = self.energy_plotdir(name) util.dirGrace2pdf(corr_plotsdir) util.dirGrace2pdf(energy_plotsdir) off_diag_corrs = list() for op_src in operators: for op_snk in operators: if op_src == op_snk: continue corr = sigmond.CorrelatorInfo(op_snk.operator_info, op_src.operator_info) if not self.data_handler.hasCorrelator(corr): continue off_diag_corrs.append(corr) with doc.create(pylatex.Subsection("Diagonal Correlators")): for operator in operators: corr = sigmond.CorrelatorInfo(operator.operator_info, operator.operator_info) if self.data_handler.hasCorrelator(corr): with doc.create(pylatex.Subsubsection(str(operator))): util.add_correlator(doc, self, corr, name, obs_handler) if self.off_diagonal and off_diag_corrs: with doc.create(pylatex.Subsection("Off-Diagonal Correlators")): for corr in off_diag_corrs: with doc.create(pylatex.Subsubsection(corr.corr_str())): util.add_correlator(doc, self, corr, name, obs_handler)
def finalize(self): doc = util.create_doc(f"Rotated Correlators and Effective Energies: {self.task_name} - {self.ensemble_name}") for operator_basis in self.operator_bases: logfile = self.logfile(repr(operator_basis)) rotation_log = sigmond_info.sigmond_log.RotationLog(logfile) if rotation_log.failed: logging.warning(f"Rotation {operator_basis.name} failed") continue corr_plotsdir = self.correlator_plotdir(operator_basis) energy_plotsdir = self.energy_plotdir(operator_basis) util.dirGrace2pdf(corr_plotsdir) util.dirGrace2pdf(energy_plotsdir) data_files = self.data_handler.getRotatedDataFiles(operator_basis) obs_handler, _ = util.get_obs_handlers(data_files, self.bins_info, self.sampling_info) with doc.create(pylatex.Section(f"{operator_basis.channel!s} - {operator_basis.name}")): with doc.create(pylatex.Subsection("Rotation Info")): with doc.create(pylatex.Center()) as centered: with centered.create( pylatex.LongTabu("X[c]|X[c]|X[c]|X[c]|X[c]|X[3,c]|X[3,c]|X[3,c]|X[3,c]|X[3,c]", to=r"\linewidth")) as param_table: header_row = [ pylatex.NoEscape(r"$N_{op}$"), pylatex.NoEscape(r"$N_{\text{d}}$"), pylatex.NoEscape(r"$\tau_N$"), pylatex.NoEscape(r"$\tau_0$"), pylatex.NoEscape(r"$\tau_D$"), pylatex.NoEscape(r"$\xi_{cn}$ (max)"), pylatex.NoEscape(r"$\xi_{cn}^C$ (input)"), pylatex.NoEscape(r"$\xi_{cn}^C$ (retain)"), pylatex.NoEscape(r"$\xi_{cn}^G$ (input)"), pylatex.NoEscape(r"$\xi_{cn}^G$ (retain)"), ] param_table.add_row(header_row, mapper=[pylatex.utils.bold]) param_table.add_hline() param_table.end_table_header() value_row = [ operator_basis.num_operators, operator_basis.num_operators - rotation_log.number_levels, operator_basis.pivot_info.norm_time, operator_basis.pivot_info.metric_time, operator_basis.pivot_info.diagonalize_time, operator_basis.pivot_info.max_condition_number, rotation_log.metric_condition(False), rotation_log.metric_condition(True), rotation_log.matrix_condition(False), rotation_log.matrix_condition(True), ] param_table.add_row(value_row) doc.append(pylatex.NoEscape(r"\textbf{Metric Null Space Check:} " + \ rotation_log.metric_null_space_message)) with doc.create(pylatex.Subsubsection("Input Operators")): with doc.create(pylatex.Center()) as centered: with centered.create( pylatex.LongTabu("X[2,c] X[c] X[c]", row_height=1.5)) as op_table: header_row = [ "Operator", pylatex.NoEscape(r"$\delta C(\tau_0)$"), pylatex.NoEscape(r"$\delta C(\tau_D)$") ] op_table.add_row(header_row, mapper=[pylatex.utils.bold]) op_table.add_hline() op_table.end_table_header() for op, errors in rotation_log.diagonal_correlator_errors.items(): row = [ op, errors.metric, errors.matrix, ] op_table.add_row op_table.add_row(row) with doc.create(pylatex.Subsubsection("Diagonal Deviations From Zero")): with doc.create(pylatex.Center()) as centered: with centered.create( pylatex.LongTabu("X[c] X[4,c] X[3,c] X[3,c] X[3,c] X[3,c] X[2,c]")) as deviation_table: header_row = [ "time", pylatex.NoEscape(r"$\delta 0_{max}$"), pylatex.NoEscape(r"$\% > 1 \sigma$"), pylatex.NoEscape(r"$\% > 2 \sigma$"), pylatex.NoEscape(r"$\% > 3 \sigma$"), pylatex.NoEscape(r"$\% > 4 \sigma$"), "Status", ] deviation_table.add_row(header_row, mapper=[pylatex.utils.bold]) deviation_table.add_hline() deviation_table.end_table_header() for time, deviation in rotation_log.deviations_from_zero.items(): row = [ time, deviation.max, deviation.one, deviation.two, deviation.three, deviation.four, deviation.status, ] deviation_table.add_row(row) doc.append(pylatex.NoEscape(r"\newpage")) operators = self.data_handler.getRotatedOperators(operator_basis) with doc.create(pylatex.Subsection("Correlators/Effective Energies")): for operator in operators: with doc.create(pylatex.Subsubsection(str(operator))): corr = sigmond.CorrelatorInfo(operator.operator_info, operator.operator_info) util.add_correlator(doc, self, corr, operator_basis, obs_handler) results_dir = self.results_dir os.makedirs(results_dir, exist_ok=True) filename = os.path.join(results_dir, self.task_name) util.compile_pdf(doc, filename, self.latex_compiler)
def generate_roga(seq_lsts_dict, genus, lab, source, work_dir, amendment_flag, amended_id): """ Generates PDF :param seq_lsts_dict: Dict of SeqIDs;LSTSIDs :param genus: Expected Genus for samples (Salmonella, Listeria, Escherichia, or Vibrio) :param lab: ID for lab report is being generated for :param source: string input for source that strains were derived from, i.e. 'ground beef' :param work_dir: bio_request directory :param amendment_flag: determined if the report is an amendment type or not (True/False) :param amended_id: ID of the original report that the new report is amending """ # RETRIEVE DATAFRAMES FOR EACH SEQID seq_list = list(seq_lsts_dict.keys()) metadata_reports = extract_report_data.get_combined_metadata(seq_list) gdcs_reports = extract_report_data.get_gdcs(seq_list) gdcs_dict = extract_report_data.generate_gdcs_dict(gdcs_reports) # Create our idiot proofing list. There are a bunch of things that can go wrong that should make us not send # out reports. As we go through data retrieval/report generation, add things that are wrong to the list, and users # will get a message saying what's wrong, no report will be generated unless user adds the FORCE flag. idiot_proofing_list = list() # DATE SETUP date = datetime.today().strftime('%Y-%m-%d') year = datetime.today().strftime('%Y') # Follow our fiscal year - anything before April is actually previous year. if datetime.now().month < 4: year = int(year) - 1 # PAGE SETUP geometry_options = { "tmargin": "2cm", "lmargin": "1cm", "rmargin": "1cm", "headsep": "1cm" } doc = pl.Document(page_numbers=False, geometry_options=geometry_options) header = produce_header_footer() doc.preamble.append(header) doc.change_document_style("header") # DATABASE HANDLING report_id = update_db(date=date, year=year, genus=genus, lab=lab, source=source, amendment_flag=amendment_flag, amended_id=amended_id) # MARKER VARIABLES SETUP all_uida = False all_vt = False all_mono = False all_enterica = False all_vibrio = False some_vt = False vt_sample_list = [] # SECOND VALIDATION SCREEN if genus == 'Escherichia': validated_ecoli_dict = extract_report_data.validate_ecoli( seq_list, metadata_reports) vt_list = [] uida_list = [] hlya_list = [] for key, value in validated_ecoli_dict.items(): ecoli_uida_present = validated_ecoli_dict[key][0] ecoli_vt_present = validated_ecoli_dict[key][1] ecoli_hlya_present = validated_ecoli_dict[key][2] hlya_list.append(ecoli_hlya_present) uida_list.append(ecoli_uida_present) vt_list.append(ecoli_vt_present) # For the AMR table so only vt+ samples are shown if ecoli_vt_present is True: vt_sample_list.append(key) if not ecoli_uida_present: print( 'WARNING: uidA not present for {}. Cannot confirm E. coli.' .format(key)) idiot_proofing_list.append( 'uidA not present in {}. Cannot confirm E. coli'.format( key)) if not ecoli_vt_present: print('WARNING: vt probe sequences not detected for {}. ' 'Cannot confirm strain is verotoxigenic.'.format(key)) idiot_proofing_list.append( 'VTX not present in {}. Cannot confirm strain is verotoxigenic' .format(key)) if False not in uida_list: all_uida = True if False not in vt_list: all_vt = True if True in vt_list: some_vt = True elif genus == 'Listeria': validated_listeria_dict = extract_report_data.validate_listeria( seq_list, metadata_reports) mono_list = [] for key, value in validated_listeria_dict.items(): mono_list.append(value) if value is False: idiot_proofing_list.append( 'Could not confirm {} as L. monocytogenes'.format(key)) if False not in mono_list: all_mono = True elif genus == 'Salmonella': validated_salmonella_dict = extract_report_data.validate_salmonella( seq_list, metadata_reports) enterica_list = [] for key, value in validated_salmonella_dict.items(): enterica_list.append(value) if value is False: idiot_proofing_list.append( 'Could not confirm {} as S. enterica'.format(key)) if False not in enterica_list: all_enterica = True elif genus == 'Vibrio': validated_vibrio_dict = extract_report_data.validate_vibrio( seq_list, metadata_reports) vibrio_list = list() for key, value in validated_vibrio_dict.items(): vibrio_list.append(value) if value is False: idiot_proofing_list.append( 'Could not confirm {} as Vibrio'.format(key)) if False not in vibrio_list: all_vibrio = True # MAIN DOCUMENT BODY with doc.create( pl.Section('Report of Genomic Analysis: ' + genus, numbering=False)): # REPORT ID AND AMENDMENT CHECKING if amendment_flag: doc.append(bold('Report ID: ')) doc.append(report_id) doc.append(italic(' (This report is an amended version of ')) doc.append(amended_id) doc.append(italic(')')) doc.append('\n') doc.append( pl.Command('TextField', options=[ "name=rdimsnumberbox", "multiline=false", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=1.1in"), "height=0.2in" ], arguments=bold('RDIMS ID: '))) doc.append(bold('\nReporting laboratory: ')) doc.append(lab) doc.append('\n\n') # LAB SUMMARY with doc.create(pl.Tabular('lcr', booktabs=True)) as table: table.add_row(bold('Laboratory'), bold('Address'), bold('Tel #')) table.add_row(lab, lab_info[lab][0], lab_info[lab][1]) # AMENDMENT FIELD with doc.create( pl.Subsubsection('Reason for amendment:', numbering=False)): with doc.create(Form()): doc.append(pl.Command('noindent')) doc.append( pl.Command('TextField', options=[ "name=amendmentbox", "multiline=true", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=7in"), "height=0.43in" ], arguments='')) else: doc.append(bold('Report ID: ')) doc.append(report_id) doc.append('\n') doc.append( pl.Command('TextField', options=[ "name=rdimsnumberbox", "multiline=false", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=1.1in"), "height=0.2in" ], arguments=bold('RDIMS ID: '))) doc.append(bold('\nReporting laboratory: ')) doc.append(lab) doc.append('\n\n') # LAB SUMMARY with doc.create(pl.Tabular('lcr', booktabs=True)) as table: table.add_row(bold('Laboratory'), bold('Address'), bold('Tel #')) table.add_row(lab, lab_info[lab][0], lab_info[lab][1]) # TEXT SUMMARY with doc.create( pl.Subsection('Identification Summary', numbering=False)) as summary: summary.append('Whole-genome sequencing analysis was conducted on ' '{} '.format(len(metadata_reports))) summary.append(italic('{} '.format(genus))) if len(metadata_reports) == 1: summary.append('strain isolated from "{}". '.format( source.lower())) else: summary.append('strains isolated from "{}". '.format( source.lower())) if genus == 'Escherichia': if all_uida: summary.append('The following strains are confirmed as ') summary.append(italic('Escherichia coli ')) summary.append( 'based on 16S sequence and the presence of marker gene ' ) summary.append(italic('uidA. ')) elif not all_uida: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Escherichia coli ')) summary.append('as the ') summary.append(italic('uidA ')) summary.append('marker gene was not detected. ') if all_vt: summary.append( 'All strain(s) are confirmed to be VTEC based on detection of probe sequences ' 'indicating the presence of verotoxin genes.') elif genus == 'Listeria': if all_mono: summary.append( 'The following strains are confirmed to be ') summary.append(italic('Listeria monocytogenes ')) summary.append('based on GeneSeekr analysis: ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Listeria monocytogenes.')) elif genus == 'Salmonella': if all_enterica: summary.append( 'The following strains are confirmed to be ') summary.append(italic('Salmonella enterica ')) summary.append('based on GeneSeekr analysis: ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Salmonella enterica.')) elif genus == 'Vibrio': if all_vibrio: summary.append( 'The following strains are confirmed to be ') summary.append(italic('Vibrio parahaemolyticus ')) summary.append('based on GeneSeekr analysis: ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Vibrio parahaemolyticus.')) # VIBRIO TABLE if genus == 'Vibrio': genesippr_table_columns = ( bold('ID'), bold(pl.NoEscape(r'R72H{\footnotesize \textsuperscript {a}}')), bold( pl.NoEscape(r'groEL{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'Virulence Profile')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] # Genus genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (r72h, groel) = '-', '-' if 'r72h' in marker_list: r72h = '+' if 'groEL' in marker_list: groel = '+' # Virulence virulence = '' if 'tdh' in marker_list: virulence += 'tdh;' if 'trh' in marker_list: virulence += 'trh;' if ';' in virulence: virulence = virulence[:-1] if virulence == '': virulence = '-' table.add_row( (lsts_id, r72h, groel, virulence, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # ESCHERICHIA TABLE if genus == 'Escherichia': genesippr_table_columns = ( bold('ID'), bold(pl.NoEscape(r'uidA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'Serotype')), bold(pl.NoEscape(r'Verotoxin(s)')), bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'eae{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'aggR{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] # Genus (pulled from 16S) genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # Serotype serotype = df.loc[df['SeqID'] == sample_id][ 'E_coli_Serotype'].values[0] # Remove % identity fixed_serotype = remove_bracketed_values(serotype) # Verotoxin verotoxin = df.loc[df['SeqID'] == sample_id][ 'Vtyper_Profile'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (uida, eae, hlya, aggr) = '-', '-', '-', '-' if 'uidA' in marker_list: uida = '+' if 'eae' in marker_list: eae = '+' if 'hlyA' in marker_list: hlya = '+' if 'aggR' in marker_list: aggr = '+' table.add_row( (lsts_id, uida, fixed_serotype, verotoxin, hlya, eae, aggr, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # LISTERIA TABLE if genus == 'Listeria': genesippr_table_columns = ( bold('ID'), bold(pl.NoEscape(r'IGS{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'inlJ{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] # Genus genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (igs, hlya, inlj) = '-', '-', '-' if 'IGS' in marker_list: igs = '+' if 'hlyA' in marker_list: hlya = '+' if 'inlJ' in marker_list: inlj = '+' table.add_row((lsts_id, igs, hlya, inlj, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # SALMONELLA TABLE if genus == 'Salmonella': genesippr_table_columns = ( bold('ID'), bold( pl.NoEscape( r'Serovar{\footnotesize \textsuperscript {a}}')), bold( pl.NoEscape( r'Serogroup{\footnotesize \textsuperscript {a,b}}')), bold(pl.NoEscape(r'H1{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'H2{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'invA{\footnotesize \textsuperscript {b}}')), bold(pl.NoEscape(r'stn{\footnotesize \textsuperscript {b}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create( pl.Tabular('|c|p{2cm}|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') # Serovar serovar = df.loc[df['SeqID'] == sample_id]['SISTR_serovar'].values[0] # If the serovar is particularly long, tables end up being longer than the page. # To fix, try to find a space somewhere near the middle of the serovar string and insert a # newline there. if len(serovar) > 12: # First, find what index a space is that we can change. starting_index = int(len(serovar) / 2) index_to_change = 999 for i in range(starting_index, len(serovar)): if serovar[i] == ' ': index_to_change = i break if index_to_change != 999: serovar_with_newline = '' for i in range(len(serovar)): if i == index_to_change: serovar_with_newline += '\\newline ' else: serovar_with_newline += serovar[i] serovar = pl.NoEscape(r'' + serovar_with_newline) # SISTR Serogroup, H1, H2 sistr_serogroup = df.loc[df['SeqID'] == sample_id][ 'SISTR_serogroup'].values[0] sistr_h1 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h1'].values[0].strip(';') sistr_h2 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h2'].values[0].strip(';') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (inva, stn) = '-', '-' if 'invA' in marker_list: inva = '+' if 'stn' in marker_list: stn = '+' table.add_row( (lsts_id, serovar, sistr_serogroup, sistr_h1, sistr_h2, inva, stn, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "Predictions conducted using SISTR " "(Salmonella In Silico Typing Resource)") create_caption( genesippr_section, 'b', "+ indicates marker presence : " "- indicates marker was not detected") # AMR TABLE (VTEC and Salmonella only) create_amr_profile = False # only create if an AMR profile exists for one of the provided samples amr_samples = [] # keep track of which samples to create rows for # Grab AMR profile as a pre-check to see if we should even create the AMR Profile table for sample_id, df in metadata_reports.items(): profile = df.loc[df['SeqID'] == sample_id]['AMR_Profile'].values[0] parsed_profile = extract_report_data.parse_amr_profile(profile) if parsed_profile is not None: if genus == 'Salmonella': amr_samples.append(sample_id) create_amr_profile = True elif genus == 'Escherichia': if sample_id in vt_sample_list: # vt_sample_list contains all vt+ sample IDs amr_samples.append(sample_id) create_amr_profile = True elif genus == 'Vibrio': amr_samples.append(sample_id) create_amr_profile = True # Create table if (genus == 'Salmonella' or some_vt is True or genus == 'Vibrio') and create_amr_profile is True: with doc.create( pl.Subsection('Antimicrobial Resistance Profiling', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|')) as table: amr_columns = (bold('ID'), bold(pl.NoEscape(r'Resistance')), bold(pl.NoEscape(r'Gene')), bold(pl.NoEscape(r'Percent Identity'))) # Header table.add_hline() table.add_row(amr_columns) # Keep track of what previous id and resistance were so we know how far to draw lines across # table. Initialize to some gibberish. previous_id = 'asdasdfasdfs' previous_resistance = 'akjsdhfasdf' # For the AMR table, don't re-write sample id if same sample has multiple resistances # Also, don't re-write resistances if same resistance has multiple genes. for sample_id, df in metadata_reports.items(): if sample_id in amr_samples: # Grab AMR profile profile = df.loc[df['SeqID'] == sample_id][ 'AMR_Profile'].values[0] # Parse and iterate through profile to generate rows parsed_profile = extract_report_data.parse_amr_profile( profile) if parsed_profile is not None: # Rows for value in parsed_profile: # ID resistance = value.resistance res_to_write = resistance lsts_id = seq_lsts_dict[sample_id] # If sample we're on is different from previous sample, line goes all the # way across the table. if lsts_id != previous_id: table.add_hline() id_to_write = lsts_id # If sample is same and resistance is same, only want lines for gene and percent # identity columns. Don't write out id or resistance again. elif resistance == previous_resistance: table.add_hline(start=3, end=4) id_to_write = '' res_to_write = '' # Finally, if resistance is different, but id is same, need line across for # resistance, gene, and percent id. Write out everything but id else: table.add_hline(start=2, end=4) id_to_write = '' previous_id = lsts_id previous_resistance = resistance # Gene gene = value.gene # Identity identity = value.percent_id # Add row table.add_row((id_to_write, res_to_write, gene, identity)) # Close off table table.add_hline() # SEQUENCE TABLE with doc.create( pl.Subsection('Sequence Quality Metrics', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|c|')) as table: # Columns sequence_quality_columns = ( bold('ID'), bold(pl.NoEscape(r'Total Length')), bold(pl.NoEscape(r'Coverage')), bold(pl.NoEscape(r'GDCS')), bold(pl.NoEscape(r'Pass/Fail')), ) # Header table.add_hline() table.add_row(sequence_quality_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # Grab values # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] total_length = df.loc[df['SeqID'] == sample_id]['TotalLength'].values[0] average_coverage_depth = df.loc[df['SeqID'] == sample_id][ 'AverageCoverageDepth'].values[0] # Fix coverage average_coverage_depth = format( float(str(average_coverage_depth).replace('X', '')), '.0f') average_coverage_depth = str(average_coverage_depth) + 'X' # Matches matches = gdcs_dict[sample_id][0] passfail = gdcs_dict[sample_id][1] if passfail == '+': passfail = 'Pass' elif passfail == '-': passfail = 'Fail' idiot_proofing_list.append( '{} failed GDCS validation'.format(sample_id)) # Add row table.add_row((lsts_id, total_length, average_coverage_depth, matches, passfail)) table.add_hline() # PIPELINE METADATA TABLE pipeline_metadata_columns = (bold('ID'), bold('Seq ID'), bold('Pipeline Version'), bold('Database Version')) with doc.create(pl.Subsection('Pipeline Metadata', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(pipeline_metadata_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] lsts_id = seq_lsts_dict[sample_id] # Pipeline version pipeline_version = df.loc[ df['SeqID'] == sample_id]['PipelineVersion'].values[0] database_version = pipeline_version # Add row table.add_row((lsts_id, sample_id, pipeline_version, database_version)) table.add_hline() # 'VERIFIED BY' FIELD with doc.create(pl.Subsubsection('Verified by:', numbering=False)): with doc.create(Form()): doc.append(pl.Command('noindent')) doc.append( pl.Command('TextField', options=[ "name=verifiedbybox", "multiline=false", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=2.5in"), "height=0.3in" ], arguments='')) # OUTPUT PDF FILE pdf_file = os.path.join(work_dir, '{}_{}_{}'.format(report_id, genus, date)) try: doc.generate_pdf(pdf_file, clean_tex=False) except: pass pdf_file += '.pdf' return pdf_file, idiot_proofing_list
def generate_roga(seq_list, genus, lab, source, work_dir, amendment_flag, amended_id): """ Generates PDF :param seq_list: List of OLC Seq IDs :param genus: Expected Genus for samples (Salmonella, Listeria, or Escherichia) :param lab: ID for lab report is being generated for :param source: string input for source that strains were derived from, i.e. 'ground beef' :param work_dir: bio_request directory :param amendment_flag: determined if the report is an amendment type or not (True/False) :param amended_id: ID of the original report that the new report is amending """ # RETRIEVE DATAFRAMES FOR EACH SEQID metadata_reports = extract_report_data.get_combined_metadata(seq_list) gdcs_reports = extract_report_data.get_gdcs(seq_list) gdcs_dict = extract_report_data.generate_gdcs_dict(gdcs_reports) # DATE SETUP date = datetime.today().strftime('%Y-%m-%d') year = datetime.today().strftime('%Y') # PAGE SETUP geometry_options = { "tmargin": "2cm", "lmargin": "1cm", "rmargin": "1cm", "headsep": "1cm" } doc = pl.Document(page_numbers=False, geometry_options=geometry_options) header = produce_header_footer() doc.preamble.append(header) doc.change_document_style("header") # DATABASE HANDLING report_id = update_db(date=date, year=year, genus=genus, lab=lab, source=source, amendment_flag=amendment_flag, amended_id=amended_id) # MARKER VARIABLES SETUP all_uida = False all_vt = False all_mono = False all_enterica = False # SECOND VALIDATION SCREEN if genus == 'Escherichia': validated_ecoli_dict = extract_report_data.validate_ecoli( seq_list, metadata_reports) vt_list = [] uida_list = [] for key, value in validated_ecoli_dict.items(): ecoli_uida_present = validated_ecoli_dict[key][0] ecoli_vt_present = validated_ecoli_dict[key][1] uida_list.append(ecoli_uida_present) vt_list.append(ecoli_vt_present) if not ecoli_uida_present: print( 'WARNING: uidA not present for {}. Cannot confirm E. coli.' .format(key)) if not ecoli_vt_present: print('WARNING: vt probe sequences not detected for {}. ' 'Cannot confirm strain is verotoxigenic.'.format(key)) if False not in uida_list: all_uida = True if False not in vt_list: all_vt = True elif genus == 'Listeria': validated_listeria_dict = extract_report_data.validate_listeria( seq_list, metadata_reports) mono_list = [] for key, value in validated_listeria_dict.items(): mono_list.append(value) if False not in mono_list: all_mono = True elif genus == 'Salmonella': validated_salmonella_dict = extract_report_data.validate_salmonella( seq_list, metadata_reports) enterica_list = [] for key, value in validated_salmonella_dict.items(): enterica_list.append(value) if False not in enterica_list: all_enterica = True # MAIN DOCUMENT BODY with doc.create( pl.Section('Report of Genomic Analysis: ' + genus, numbering=False)): # REPORT ID AND AMENDMENT CHECKING if amendment_flag: doc.append(bold('Report ID: ')) doc.append(report_id) doc.append(italic(' (This report is an amended version of ')) doc.append(amended_id) doc.append(italic(')')) doc.append(bold('\nReporting laboratory: ')) doc.append(lab) doc.append('\n\n') # LAB SUMMARY with doc.create(pl.Tabular('lcr', booktabs=True)) as table: table.add_row(bold('Laboratory'), bold('Address'), bold('Tel #')) table.add_row(lab, lab_info[lab][0], lab_info[lab][1]) # AMENDMENT FIELD with doc.create( pl.Subsubsection('Reason for amendment:', numbering=False)): with doc.create(Form()): doc.append(pl.Command('noindent')) doc.append( pl.Command('TextField', options=[ "name=amendmentbox", "multiline=true", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=7in"), "height=0.43in" ], arguments='')) else: doc.append(bold('Report ID: ')) doc.append(report_id) doc.append(bold('\nReporting laboratory: ')) doc.append(lab) doc.append('\n\n') # LAB SUMMARY with doc.create(pl.Tabular('lcr', booktabs=True)) as table: table.add_row(bold('Laboratory'), bold('Address'), bold('Tel #')) table.add_row(lab, lab_info[lab][0], lab_info[lab][1]) # TEXT SUMMARY with doc.create( pl.Subsection('Identification Summary', numbering=False)) as summary: summary.append('Whole-genome sequencing analysis was conducted on ' '{} '.format(len(metadata_reports))) summary.append(italic('{} '.format(genus))) if len(metadata_reports) == 1: summary.append('strain isolated from "{}". '.format( source.lower())) else: summary.append('strains isolated from "{}". '.format( source.lower())) if genus == 'Escherichia': if all_uida: summary.append('The following strains are confirmed as ') summary.append(italic('Escherichia coli ')) summary.append( 'based on 16S sequence and the presence of marker gene ' ) summary.append(italic('uidA. ')) elif not all_uida: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Escherichia coli ')) summary.append('as the ') summary.append(italic('uidA ')) summary.append('marker gene was not detected. ') if all_vt: summary.append( 'All strain(s) are confirmed to be VTEC based on detection of probe sequences ' 'indicating the presence of verotoxin genes.') elif genus == 'Listeria': if all_mono: summary.append( 'The following strains are confirmed to be ') summary.append(italic('Listeria monocytogenes ')) summary.append('based on GeneSeekr analysis: ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Listeria monocytogenes.')) elif genus == 'Salmonella': if all_enterica: summary.append( 'The following strains are confirmed to be ') summary.append(italic('Salmonella enterica ')) summary.append('based on GeneSeekr analysis: ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Salmonella enterica.')) # ESCHERICHIA TABLE if genus == 'Escherichia': genesippr_table_columns = ( bold('ID'), bold(pl.NoEscape(r'uidA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'Serotype')), bold(pl.NoEscape(r'Verotoxin Profile')), bold(pl.NoEscape(r'eae{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create( pl.Tabular(table_spec='|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Genus (pulled from 16S) genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # Serotype serotype = df.loc[df['SeqID'] == sample_id][ 'E_coli_Serotype'].values[0] # Remove % identity fixed_serotype = remove_bracketed_values(serotype) # Verotoxin verotoxin = df.loc[df['SeqID'] == sample_id][ 'Vtyper_Profile'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (uida, eae) = '-', '-' if 'uidA' in marker_list: uida = '+' if 'eae' in marker_list: eae = '+' table.add_row((lsts_id, uida, fixed_serotype, verotoxin, eae, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # LISTERIA TABLE if genus == 'Listeria': genesippr_table_columns = ( bold('ID'), bold(pl.NoEscape(r'IGS{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'inlJ{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Genus genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (igs, hlya, inlj) = '-', '-', '-' if 'IGS' in marker_list: igs = '+' if 'hlyA' in marker_list: hlya = '+' if 'inlJ' in marker_list: inlj = '+' table.add_row((lsts_id, igs, hlya, inlj, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # SALMONELLA TABLE if genus == 'Salmonella': genesippr_table_columns = ( bold('ID'), bold( pl.NoEscape( r'Serovar{\footnotesize \textsuperscript {a}}')), bold( pl.NoEscape( r'Serogroup{\footnotesize \textsuperscript {a,b}}')), bold(pl.NoEscape(r'H1{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'H2{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'invA{\footnotesize \textsuperscript {b}}')), bold(pl.NoEscape(r'stn{\footnotesize \textsuperscript {b}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # MLST/rMLST mlst = str(df.loc[df['SeqID'] == sample_id] ['MLST_Result'].values[0]).replace( '-', 'New') rmlst = str(df.loc[df['SeqID'] == sample_id] ['rMLST_Result'].values[0]).replace( '-', 'New') # Serovar serovar = df.loc[df['SeqID'] == sample_id]['SISTR_serovar'].values[0] # SISTR Serogroup, H1, H2 sistr_serogroup = df.loc[df['SeqID'] == sample_id][ 'SISTR_serogroup'].values[0] sistr_h1 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h1'].values[0].strip(';') sistr_h2 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h2'].values[0].strip(';') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (inva, stn) = '-', '-' if 'invA' in marker_list: inva = '+' if 'stn' in marker_list: stn = '+' table.add_row( (lsts_id, serovar, sistr_serogroup, sistr_h1, sistr_h2, inva, stn, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "Predictions conducted using SISTR " "(Salmonella In Silico Typing Resource)") create_caption( genesippr_section, 'b', "+ indicates marker presence : " "- indicates marker was not detected") # SEQUENCE TABLE sequence_quality_columns = ( bold('ID'), bold(pl.NoEscape(r'Total Length')), bold(pl.NoEscape(r'Coverage')), bold(pl.NoEscape(r'GDCS')), bold(pl.NoEscape(r'Pass/Fail')), ) with doc.create( pl.Subsection('Sequence Quality Metrics', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(sequence_quality_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # Grab values lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] total_length = df.loc[df['SeqID'] == sample_id]['TotalLength'].values[0] average_coverage_depth = df.loc[df['SeqID'] == sample_id][ 'AverageCoverageDepth'].values[0] # Fix coverage average_coverage_depth = format( float(str(average_coverage_depth).replace('X', '')), '.0f') average_coverage_depth = str(average_coverage_depth) + 'X' # Matches matches = gdcs_dict[sample_id][0] passfail = gdcs_dict[sample_id][1] if passfail == '+': passfail = 'Pass' elif passfail == '-': passfail = 'Fail' # Add row table.add_row((lsts_id, total_length, average_coverage_depth, matches, passfail)) table.add_hline() # PIPELINE METADATA TABLE pipeline_metadata_columns = (bold('ID'), bold('Seq ID'), bold('Pipeline Version'), bold('Database Version')) with doc.create(pl.Subsection('Pipeline Metadata', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(pipeline_metadata_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Pipeline version pipeline_version = df.loc[ df['SeqID'] == sample_id]['PipelineVersion'].values[0] database_version = pipeline_version # Add row table.add_row((lsts_id, sample_id, pipeline_version, database_version)) table.add_hline() # 'VERIFIED BY' FIELD with doc.create(pl.Subsubsection('Verified by:', numbering=False)): with doc.create(Form()): doc.append(pl.Command('noindent')) doc.append( pl.Command('TextField', options=[ "name=verifiedbybox", "multiline=false", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=2.5in"), "height=0.3in" ], arguments='')) # OUTPUT PDF FILE pdf_file = os.path.join(work_dir, '{}_{}_{}'.format(report_id, genus, date)) try: doc.generate_pdf(pdf_file, clean_tex=False) except: pass pdf_file += '.pdf' return pdf_file
def generate_roga(seq_list, genus, lab, source): """ Generates PDF ROGA :param seq_list: List of OLC Seq IDs :param genus: Expected Genus for samples (Salmonella, Listeria, or Escherichia) :param lab: ID for lab report is being generated for :param source: string input for source that strains were derived from, i.e. 'ground beef' """ # Grab combinedMetadata dataframes for each requested Seq ID metadata_reports = extract_report_data.get_combined_metadata(seq_list) # Date setup date = datetime.today().strftime('%Y-%m-%d') year = datetime.today().strftime('%Y') # Grab GDCS data for each requested Seq ID gdcs_reports = extract_report_data.get_gdcs(seq_list) gdcs_dict = extract_report_data.generate_gdcs_dict(gdcs_reports) # Page setup geometry_options = { "tmargin": "2cm", "lmargin": "1.8cm", "rmargin": "1.8cm", "headsep": "1cm" } doc = pl.Document(page_numbers=False, geometry_options=geometry_options) header = produce_header_footer() doc.preamble.append(header) doc.change_document_style("header") # DATABASE HANDLING report_id = update_db(date=date, year=year, genus=genus, lab=lab, source=source) # SECOND VALIDATION SCREEN if genus == 'Escherichia': validated_ecoli_dict = extract_report_data.validate_ecoli( seq_list, metadata_reports) vt_list = [] uida_list = [] for key, value in validated_ecoli_dict.items(): ecoli_uida_present = validated_ecoli_dict[key][0] ecoli_vt_present = validated_ecoli_dict[key][1] uida_list.append(ecoli_uida_present) vt_list.append(ecoli_vt_present) if not ecoli_uida_present: print( 'WARNING: uidA not present for {}. Cannot confirm E. coli.' .format(key)) if not ecoli_vt_present: print( 'WARNING: vt marker not detected for {}. Cannot confirm strain is verotoxigenic.' .format(key)) all_uida = False if False not in uida_list: all_uida = True all_vt = False if False not in vt_list: all_vt = True elif genus == 'Listeria': validated_listeria_dict = extract_report_data.validate_mash( seq_list, metadata_reports, 'Listeria monocytogenes') mono_list = [] for key, value in validated_listeria_dict.items(): mono_list.append(value) if False not in mono_list: all_mono = True else: all_mono = False elif genus == 'Salmonella': validated_salmonella_dict = extract_report_data.validate_mash( seq_list, metadata_reports, 'Salmonella enterica') enterica_list = [] for key, value in validated_salmonella_dict.items(): enterica_list.append(value) if False not in enterica_list: all_enterica = True else: all_enterica = False # DOCUMENT BODY/CREATION with doc.create( pl.Section('Report of Genomic Analysis: ' + genus, numbering=False)): # REPORT ID doc.append(bold('Report ID: ')) doc.append(report_id) # REPORTING LAB doc.append(bold('\nReporting laboratory: ')) doc.append(lab) doc.append('\n\n') # LAB SUMMARY with doc.create(pl.Tabular('lcr', booktabs=True)) as table: table.add_row(bold('Laboratory'), bold('Address'), bold('Tel #')) table.add_row(lab, lab_info[lab][0], lab_info[lab][1]) # TEXT SUMMARY with doc.create( pl.Subsection('Identification Summary', numbering=False)) as summary: summary.append( 'Whole-genome sequencing analysis was conducted on {} presumptive ' .format(len(metadata_reports))) summary.append(italic('{} '.format(genus))) if len(metadata_reports) == 1: summary.append('strain isolated from {}. '.format(source)) else: summary.append('strains isolated from {}. '.format(source)) if genus == 'Escherichia': if all_uida: summary.append( 'All of the following strains are confirmed as ') summary.append(italic('Escherichia coli ')) summary.append( 'based on 16S sequence and the presence of marker gene ' ) summary.append(italic('uidA. ')) elif not all_uida: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Escherichia coli ')) summary.append('as the ') summary.append(italic('uidA ')) summary.append('marker gene was not detected. ') if all_vt: summary.append( 'All strains are confirmed to be verotoxigenic based on presence of the ' ) summary.append(italic('vt ')) summary.append('marker.') elif genus == 'Listeria': if all_mono: summary.append( 'All of the following strains are confirmed to be ') summary.append(italic('Listeria monocytogenes ')) summary.append('based on GeneSeekr analysis. ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Listeria monocytogenes.')) elif genus == 'Salmonella': if all_enterica: summary.append( 'All of the following strains are confirmed to be ') summary.append(italic('Salmonella enterica ')) summary.append('based on GeneSeekr analysis. ') else: summary.append( 'Some of the following strains could not be confirmed to be ' ) summary.append(italic('Salmonella enterica.')) # ESCHERICHIA TABLE if genus == 'Escherichia': genesippr_table_columns = ( bold('LSTS ID'), bold(pl.NoEscape(r'uidA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'Serotype')), bold(pl.NoEscape(r'Verotoxin Profile')), bold(pl.NoEscape(r'eae{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Genus (pulled from 16S) genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # Serotype serotype = df.loc[df['SeqID'] == sample_id][ 'E_coli_Serotype'].values[0] # Remove % identity fixed_serotype = remove_bracketed_values(serotype) # Verotoxin verotoxin = df.loc[df['SeqID'] == sample_id][ 'Vtyper_Profile'].values[0] # MLST/rMLST mlst = df.loc[df['SeqID'] == sample_id]['MLST_Result'].values[0] rmlst = df.loc[df['SeqID'] == sample_id][ 'rMLST_Result'].values[0].replace('-', 'New') marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (uida, eae) = '-', '-' if 'uidA' in marker_list: uida = '+' if 'eae' in marker_list: eae = '+' table.add_row((lsts_id, uida, fixed_serotype, verotoxin, eae, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # LISTERIA TABLE if genus == 'Listeria': genesippr_table_columns = ( bold('LSTS ID'), bold(pl.NoEscape(r'IGS{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'inlJ{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Genus genus = df.loc[df['SeqID'] == sample_id]['Genus'].values[0] # MLST/rMLST mlst = df.loc[df['SeqID'] == sample_id]['MLST_Result'].values[0] rmlst = df.loc[df['SeqID'] == sample_id][ 'rMLST_Result'].values[0].replace('-', 'New') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (igs, hlya, inlj) = '-', '-', '-' if 'IGS' in marker_list: igs = '+' if 'hlyA' in marker_list: hlya = '+' if 'inlJ' in marker_list: inlj = '+' table.add_row((lsts_id, igs, hlya, inlj, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # SALMONELLA TABLE if genus == 'Salmonella': genesippr_table_columns = ( bold('LSTS ID'), bold(pl.NoEscape(r'Serovar')), bold( pl.NoEscape( r'Serogroup{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'H1')), bold(pl.NoEscape(r'H2')), bold(pl.NoEscape(r'invA{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'stn{\footnotesize \textsuperscript {a}}')), bold(pl.NoEscape(r'MLST')), bold(pl.NoEscape(r'rMLST')), ) with doc.create( pl.Subsection('GeneSeekr Analysis', numbering=False)) as genesippr_section: with doc.create(pl.Tabular('|c|c|c|c|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(genesippr_table_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # MLST/rMLST mlst = df.loc[df['SeqID'] == sample_id]['MLST_Result'].values[0] rmlst = df.loc[df['SeqID'] == sample_id][ 'rMLST_Result'].values[0].replace('-', 'New') # Serovar serovar = df.loc[df['SeqID'] == sample_id]['SISTR_serovar'].values[0] # SISTR Serogroup, H1, H2 sistr_serogroup = df.loc[df['SeqID'] == sample_id][ 'SISTR_serogroup'].values[0] sistr_h1 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h1'].values[0].strip(';') sistr_h2 = df.loc[df['SeqID'] == sample_id][ 'SISTR_h2'].values[0].strip(';') # Markers marker_list = df.loc[df['SeqID'] == sample_id][ 'GeneSeekr_Profile'].values[0] (inva, stn) = '-', '-' if 'invA' in marker_list: inva = '+' if 'stn' in marker_list: stn = '+' table.add_row( (lsts_id, serovar, sistr_serogroup, sistr_h1, sistr_h2, inva, stn, mlst, rmlst)) table.add_hline() create_caption( genesippr_section, 'a', "+ indicates marker presence : " "- indicates marker was not detected") # SEQUENCE QUALITY METRICS sequence_quality_columns = ( bold('LSTS ID'), bold(pl.NoEscape(r'Total Length')), bold(pl.NoEscape(r'Coverage')), bold(pl.NoEscape(r'GDCS')), bold(pl.NoEscape(r'Pass/Fail')), ) # Create the sequence table with doc.create( pl.Subsection('Sequence Quality Metrics', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(sequence_quality_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # Grab values lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] total_length = df.loc[df['SeqID'] == sample_id]['TotalLength'].values[0] average_coverage_depth = df.loc[df['SeqID'] == sample_id][ 'AverageCoverageDepth'].values[0] # Fix coverage average_coverage_depth = format( float(average_coverage_depth.replace('X', '')), '.0f') average_coverage_depth = str(average_coverage_depth) + 'X' # Matches matches = gdcs_dict[sample_id][0] passfail = gdcs_dict[sample_id][1] if passfail == '+': passfail = 'Pass' elif passfail == '-': passfail = 'Fail' # Add row table.add_row((lsts_id, total_length, average_coverage_depth, matches, passfail)) table.add_hline() # Pipeline metadata table pipeline_metadata_columns = (bold('LSTS ID'), bold('Seq ID'), bold('Pipeline Version'), bold('Database Version')) with doc.create(pl.Subsection('Pipeline Metadata', numbering=False)): with doc.create(pl.Tabular('|c|c|c|c|')) as table: # Header table.add_hline() table.add_row(pipeline_metadata_columns) # Rows for sample_id, df in metadata_reports.items(): table.add_hline() # LSTS ID lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0] # Pipeline version pipeline_version = df.loc[ df['SeqID'] == sample_id]['PipelineVersion'].values[0] database_version = pipeline_version # These have been harmonized # database_version = df.loc[df['SeqID'] == sample_id]['DatabaseVersion'].values[0] # Add row table.add_row((lsts_id, sample_id, pipeline_version, database_version)) table.add_hline() # VERIFIED BY with doc.create(pl.Subsubsection('Verified by:', numbering=False)): with doc.create(Form()): doc.append(pl.Command('noindent')) doc.append( pl.Command('TextField', options=[ "name=multilinetextbox", "multiline=false", pl.NoEscape("bordercolor=0 0 0"), pl.NoEscape("width=2.5in"), "height=0.3in" ], arguments='')) doc.generate_pdf('{}_{}_{}'.format(report_id, genus, date), clean_tex=False)
def report(self, outfile=None): class AllTT(Environment): packages = [Package('alltt')] escape = False content_separator = "\n" class Amsmath(Environment): packages = [Package('amsmath')] escape = False content_separator = "\n" class Align(Environment): packages = [Package('amsmath')] escape = False content_separator = "\n" class Breqn(Environment): packages = [Package('breqn')] escape = False content_separator = "\n" def equation(numbering=True): numbering = "" if numbering else "*" eq = Amsmath() eq._latex_name = "equation" + numbering return eq def dmath(numbering=True): numbering = "" if numbering else "*" eq = Breqn() eq._latex_name = "dmath" + numbering return eq align = Align() align_s = Align() align_s._latex_name = "align*" doc = pylatex.Document('article') doc.packages.append(Package('booktabs')) with doc.create(pylatex.Section("Results")): res = "Product forming complex\n" doc.append(res) dp_dt = equation(numbering=False) producing_terms = "+".join([ e.as_latex() + r.as_latex() for e, r in self._product_forming_complex ]) consuming_terms = "-".join([ e.as_latex() + r.as_latex() for e, r in self._product_consuming_complex ]) res = f"\\frac{{dP}}{{dT}} = \\frac{{{producing_terms}-{consuming_terms}}}{{\\sum}}" dp_dt.append(res) doc.append(dp_dt) doc.append("Simplifications:\n") rates = equation(numbering=False) zero_rates = ",".join([sympy.latex(x) for x in self._null_rates]) + " = 0" rates.append(zero_rates) doc.append(rates) doc.append("Substitutions") subs = [ f'{sympy.latex(x[0])} = {sympy.latex(x[1])}' for x in self._substitutions ] for el in subs: term = equation(numbering=False) term.append(el) doc.append(term) doc.append("Resulting equation") eq_ = equation(numbering=False) eq = f"\\frac{{dP}}{{dT}} = v = \\frac{{N}}{{D}}" eq_.append(eq) doc.append(eq_) doc.append("where") n, d = sympy.fraction(self.substitute()) eq = dmath(numbering=False) eq.append("N = " + sympy.latex(n)) doc.append(eq) doc.append("and") eq = dmath(numbering=False) eq.append("D = " + sympy.latex(d)) doc.append(eq) with doc.create(pylatex.Section("Full report")): with doc.create(pylatex.Subsection("Input data")): doc.append(f"Input file name: ") with doc.create(AllTT()): doc.append(f'{self._report["infile"]}') doc.append(f"File contents:") with doc.create(AllTT()): doc.append(self._report["input"]) with doc.create(pylatex.Subsection("Parsed reactions")): txt = "Reactions after parsing: \n" doc.append(txt) with doc.create(align_s): for el in self._report["Reactions"]: doc.append(el + "\\\\") with doc.create(pylatex.Subsection("Linear graph matrix")): table = [] for el in self._report["lin_graph_matrix"]: table.append(map(lambda x: x.as_latex() if x else "", el)) matrix = DataFrame(table).as_matrix() latex_matrix = pylatex.Matrix(matrix, mtype="b") doc.append(pylatex.Math(data=[latex_matrix])) with doc.create(pylatex.Subsection("Kinetic matrix")): table = [] for el in self._report["kin_matrix"]: table.append(map(lambda x: x.as_latex() if x else "", el)) matrix = DataFrame(table).as_matrix() latex_matrix = pylatex.Matrix(matrix, mtype="b") doc.append(pylatex.Math(data=[latex_matrix])) with doc.create(pylatex.Subsection("King-Altman Patterns")): table = [] for el in self._report["kaPatterns"]: table.append( map(lambda x: x.as_latex(add_math_mode=True), el)) table = DataFrame(table).to_latex(escape=False, header=False) doc.append(pylatex.NoEscape(table)) #Type of self._report["directed_patterns"] = List[[Enzymestate, 2dMatrix_for_enzymestate]] with doc.create(pylatex.Subsection("Directed Patterns")): for el in self._report["directed_patterns"]: table = [] with doc.create( pylatex.Subsubsection( f"Directed Pattern for {el[0]}")): for list_of_reac in el[1]: table.append([ y.as_latex(add_math_mode=True) for y in list_of_reac ]) #table.append(list(map(lambda x: list(map(lambda y: y.as_latex(add_math_mode=True), x)), el[1]))) la_table = DataFrame(table).to_latex(escape=False, header=False) doc.append(pylatex.NoEscape(la_table)) if not outfile: outfile = "KingAltman sln of" + self._report["infile"] doc.generate_pdf(sys.path[0] + "\\" + outfile, clean_tex=False, compiler='pdflatex') print("generated outfile")