def save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger): ref_misassemblies = [result['istranslocations_by_refs'] if result else [] for result in results] potential_misassemblies_by_refs = [result['potential_misassemblies_by_refs'] if result else [] for result in results] all_refs = sorted(list(set([ref for ref in ref_labels_by_chromosomes.values()]))) misassemblies_by_refs_rows = [] row = {'metricName': 'References', 'values': all_refs} misassemblies_by_refs_rows.append(row) if ref_misassemblies: for i, fpath in enumerate(contigs_fpaths): row = {'metricName': qutils.label_from_fpath(fpath), 'values': []} misassemblies_by_refs_rows.append(row) if ref_misassemblies[i]: assembly_name = qutils.name_from_fpath(fpath) all_rows = [] row = {'metricName': 'References', 'values': [ref_num + 1 for ref_num in range(len(all_refs))]} all_rows.append(row) for k in all_refs: row = {'metricName': k, 'values': []} for ref in all_refs: if ref == k or ref not in ref_misassemblies[i]: row['values'].append(None) else: row['values'].append(ref_misassemblies[i][ref][k]) misassemblies_by_refs_rows[-1]['values'].append(max(0, sum([r for r in row['values'] if r]) + potential_misassemblies_by_refs[i][k])) all_rows.append(row) misassembly_by_ref_fpath = os.path.join(output_dir, 'interspecies_translocations_by_refs_%s.info' % assembly_name) with open(misassembly_by_ref_fpath, 'w') as misassembly_by_ref_file: misassembly_by_ref_file.write('Number of interspecies translocations by references: \n') print_file(all_rows, misassembly_by_ref_fpath, append_to_existing_file=True) with open(misassembly_by_ref_fpath, 'a') as misassembly_by_ref_file: misassembly_by_ref_file.write('References:\n') for ref_num, ref in enumerate(all_refs): misassembly_by_ref_file.write(str(ref_num + 1) + ' - ' + ref + '\n') logger.info(' Information about interspecies translocations by references for %s is saved to %s' % (assembly_name, misassembly_by_ref_fpath)) misassemblies = [] if qconfig.draw_plots: from quast_libs import plotter aligned_contigs_labels = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: aligned_contigs_labels.append(row['metricName']) else: misassemblies_by_refs_rows.remove(row) for i in range(len(all_refs)): cur_results = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: cur_results.append(row['values'][i]) misassemblies.append(cur_results) is_translocations_plot_fpath = os.path.join(output_dir, 'intergenomic_misassemblies.' + qconfig.plot_extension) plotter.draw_meta_summary_plot('', output_dir, aligned_contigs_labels, all_refs, misassemblies_by_refs_rows, misassemblies, is_translocations_plot_fpath, title='Intergenomic misassemblies (found and supposed)', reverse=False, yaxis_title=None, print_all_refs=True)
def do(html_fpath, output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics, misassembly_metrics, ref_names): labels = get_labels(combined_output_dirpath, qconfig.report_prefix + '.tsv') contigs_num = len(labels) plots_dirname = qconfig.plot_extension.upper() for ext in ['TXT', plots_dirname, 'TEX', 'TSV']: if not os.path.isdir(os.path.join(output_dirpath, ext)): os.mkdir(os.path.join(output_dirpath, ext)) for metric in metrics: if not isinstance(metric, tuple): summary_txt_fpath = os.path.join(output_dirpath, 'TXT', metric.replace(' ', '_') + '.txt') summary_tex_fpath = os.path.join(output_dirpath, 'TEX', metric.replace(' ', '_') + '.tex') summary_tsv_fpath = os.path.join(output_dirpath, 'TSV', metric.replace(' ', '_') + '.tsv') summary_plot_fpath = os.path.join(output_dirpath, plots_dirname, metric.replace(' ', '_')) results, all_rows, cur_ref_names = \ get_results_for_metric(ref_names, metric, contigs_num, labels, output_dirpath_per_ref, qconfig.transposed_report_prefix + '.tsv') if not results or not results[0]: continue if cur_ref_names: transposed_table = [{ 'metricName': 'Assemblies', 'values': [ all_rows[i]['metricName'] for i in range(1, len(all_rows)) ], }] for i in range(len(all_rows[0]['values'])): values = [] for j in range(1, len(all_rows)): values.append(all_rows[j]['values'][i]) transposed_table.append({ 'metricName': all_rows[0]['values'][i], # name of reference 'values': values }) print_file(transposed_table, summary_txt_fpath) reporting.save_tsv(summary_tsv_fpath, transposed_table) reporting.save_tex(summary_tex_fpath, transposed_table) reverse = False if reporting.get_quality( metric) == reporting.Fields.Quality.MORE_IS_BETTER: reverse = True y_label = None if metric in [ reporting.Fields.TOTALLEN, reporting.Fields.TOTALLENS__FOR_1000_THRESHOLD, reporting.Fields.TOTALLENS__FOR_10000_THRESHOLD, reporting.Fields.TOTALLENS__FOR_50000_THRESHOLD ]: y_label = 'Total length' elif metric == reporting.Fields.TOTAL_ALIGNED_LEN: y_label = 'Aligned length' elif metric in [ reporting.Fields.LARGCONTIG, reporting.Fields.N50, reporting.Fields.NGA50, reporting.Fields.MIS_EXTENSIVE_BASES ]: y_label = 'Contig length' elif metric == reporting.Fields.LARGALIGN: y_label = 'Alignment length' plotter.draw_meta_summary_plot(html_fpath, output_dirpath, labels, cur_ref_names, results, summary_plot_fpath, title=metric, reverse=reverse, yaxis_title=y_label, print_all_refs=True, logger=logger) if metric == reporting.Fields.MISASSEMBL: mis_results = [] report_fname = os.path.join( 'contigs_reports', qconfig.transposed_report_prefix + '_misassemblies' + '.tsv') if ref_names[-1] == qconfig.not_aligned_name: cur_ref_names = ref_names[:-1] for misassembly_metric in misassembly_metrics: results, all_rows, cur_ref_names = \ get_results_for_metric(cur_ref_names, misassembly_metric[len(reporting.Fields.TAB):], contigs_num, labels, output_dirpath_per_ref, report_fname) if results: mis_results.append(results) if mis_results: json_points = [] for contig_num in range(contigs_num): plot_fpath = os.path.join( output_dirpath, plots_dirname, qutils.slugify(labels[contig_num]) + '_misassemblies') json_points.append( plotter.draw_meta_summary_misassemblies_plot( mis_results, cur_ref_names, contig_num, plot_fpath, title=labels[contig_num])) if qconfig.html_report: from quast_libs.html_saver import html_saver if ref_names[-1] == qconfig.not_aligned_name: cur_ref_names = ref_names[:-1] if json_points: html_saver.save_meta_misassemblies( html_fpath, output_dirpath, json_points, labels, cur_ref_names) logger.main_info('') logger.main_info( ' Text versions of reports and plots for each metric (for all references and assemblies) are saved to ' + output_dirpath + '/')
def save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger): istranslocations_by_asm = [result['istranslocations_by_refs'] if result else None for result in results] misassemblies_by_asm = [result['misassemblies_by_ref'] if result else None for result in results] all_refs = [] for ref in ref_labels_by_chromosomes.values(): if ref not in all_refs: all_refs.append(ref) if not qconfig.use_input_ref_order: all_refs.sort() misassemblies_by_refs_rows = [] row = {'metricName': 'References', 'values': all_refs} misassemblies_by_refs_rows.append(row) if not istranslocations_by_asm: return for i, fpath in enumerate(contigs_fpaths): label = qutils.label_from_fpath(fpath) row = {'metricName': label, 'values': []} misassemblies_by_refs_rows.append(row) istranslocations_by_ref = istranslocations_by_asm[i] intergenomic_misassemblies_by_asm[label] = defaultdict(list) for ref in all_refs: intergenomic_misassemblies_by_asm[label][ref] = misassemblies_by_asm[i][ref] if misassemblies_by_asm[i] else [] if istranslocations_by_ref: assembly_name = qutils.name_from_fpath(fpath) all_rows = [] row = {'metricName': 'References', 'values': [ref_num + 1 for ref_num in range(len(all_refs))]} all_rows.append(row) for ref in all_refs: row = {'metricName': ref, 'values': []} for second_ref in all_refs: if ref == second_ref or second_ref not in istranslocations_by_ref: row['values'].append(None) else: row['values'].append(istranslocations_by_ref[ref][second_ref]) possible_misassemblies = 0 misassemblies_by_ref = misassemblies_by_asm[i] if misassemblies_by_ref: possible_misassemblies = misassemblies_by_ref[ref].count(Misassembly.POSSIBLE_MISASSEMBLIES) istranslocations = max(0, sum([r for r in row['values'] if r])) misassemblies_by_refs_rows[-1]['values'].append(istranslocations + possible_misassemblies) all_rows.append(row) misassembly_by_ref_fpath = os.path.join(output_dir, 'interspecies_translocations_by_refs_%s.info' % assembly_name) with open(misassembly_by_ref_fpath, 'w') as misassembly_by_ref_file: misassembly_by_ref_file.write('Number of interspecies translocations by references: \n') print_file(all_rows, misassembly_by_ref_fpath, append_to_existing_file=True) with open(misassembly_by_ref_fpath, 'a') as misassembly_by_ref_file: misassembly_by_ref_file.write('References:\n') for ref_num, ref in enumerate(all_refs): misassembly_by_ref_file.write(str(ref_num + 1) + ' - ' + ref + '\n') logger.info(' Information about interspecies translocations by references for %s is saved to %s' % (assembly_name, misassembly_by_ref_fpath)) misassemblies = [] if qconfig.draw_plots: from quast_libs import plotter aligned_contigs_labels = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: aligned_contigs_labels.append(row['metricName']) else: misassemblies_by_refs_rows.remove(row) for i in range(len(all_refs)): cur_results = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: cur_results.append(row['values'][i]) misassemblies.append(cur_results) is_translocations_plot_fpath = os.path.join(output_dir, 'intergenomic_misassemblies') plotter.draw_meta_summary_plot('', output_dir, aligned_contigs_labels, all_refs, misassemblies, is_translocations_plot_fpath, title='Intergenomic misassemblies (found and supposed)', reverse=False, yaxis_title=None, print_all_refs=True, logger=logger)
def save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger): istranslocations_by_asm = [ result['istranslocations_by_refs'] if result else None for result in results ] misassemblies_by_asm = [ result['misassemblies_by_ref'] if result else None for result in results ] all_refs = [] for ref in ref_labels_by_chromosomes.values(): if ref not in all_refs: all_refs.append(ref) if not qconfig.use_input_ref_order: all_refs.sort() misassemblies_by_refs_rows = [] row = {'metricName': 'References', 'values': all_refs} misassemblies_by_refs_rows.append(row) if not istranslocations_by_asm: return for i, fpath in enumerate(contigs_fpaths): label = qutils.label_from_fpath(fpath) row = {'metricName': label, 'values': []} misassemblies_by_refs_rows.append(row) istranslocations_by_ref = istranslocations_by_asm[i] intergenomic_misassemblies_by_asm[label] = defaultdict(list) for ref in all_refs: intergenomic_misassemblies_by_asm[label][ ref] = misassemblies_by_asm[i][ref] if misassemblies_by_asm[ i] else [] if istranslocations_by_ref: assembly_name = qutils.name_from_fpath(fpath) all_rows = [] row = { 'metricName': 'References', 'values': [ref_num + 1 for ref_num in range(len(all_refs))] } all_rows.append(row) for ref in all_refs: row = {'metricName': ref, 'values': []} for second_ref in all_refs: if ref == second_ref or second_ref not in istranslocations_by_ref: row['values'].append(None) else: row['values'].append( istranslocations_by_ref[ref][second_ref]) possible_misassemblies = 0 misassemblies_by_ref = misassemblies_by_asm[i] if misassemblies_by_ref: possible_misassemblies = misassemblies_by_ref[ref].count( Misassembly.POSSIBLE_MISASSEMBLIES) istranslocations = max(0, sum([r for r in row['values'] if r])) misassemblies_by_refs_rows[-1]['values'].append( istranslocations + possible_misassemblies) all_rows.append(row) misassembly_by_ref_fpath = os.path.join( output_dir, 'interspecies_translocations_by_refs_%s.info' % assembly_name) with open(misassembly_by_ref_fpath, 'w') as misassembly_by_ref_file: misassembly_by_ref_file.write( 'Number of interspecies translocations by references: \n') print_file(all_rows, misassembly_by_ref_fpath, append_to_existing_file=True) with open(misassembly_by_ref_fpath, 'a') as misassembly_by_ref_file: misassembly_by_ref_file.write('References:\n') for ref_num, ref in enumerate(all_refs): misassembly_by_ref_file.write( str(ref_num + 1) + ' - ' + ref + '\n') logger.info( ' Information about interspecies translocations by references for %s is saved to %s' % (assembly_name, misassembly_by_ref_fpath)) misassemblies = [] if qconfig.draw_plots: from quast_libs import plotter aligned_contigs_labels = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: aligned_contigs_labels.append(row['metricName']) else: misassemblies_by_refs_rows.remove(row) for i in range(len(all_refs)): cur_results = [] for row in misassemblies_by_refs_rows[1:]: if row['values']: cur_results.append(row['values'][i]) misassemblies.append(cur_results) is_translocations_plot_fpath = os.path.join( output_dir, 'intergenomic_misassemblies') plotter.draw_meta_summary_plot( '', output_dir, aligned_contigs_labels, all_refs, misassemblies, is_translocations_plot_fpath, title='Intergenomic misassemblies (found and supposed)', reverse=False, yaxis_title=None, print_all_refs=True, logger=logger)
def do(html_fpath, output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics, misassembl_metrics, ref_names): labels = get_labels(combined_output_dirpath, qconfig.report_prefix + '.tsv') contigs_num = len(labels) plots_dirname = qconfig.plot_extension.upper() for ext in ['TXT', plots_dirname, 'TEX', 'TSV']: if not os.path.isdir(os.path.join(output_dirpath, ext)): os.mkdir(os.path.join(output_dirpath, ext)) for metric in metrics: if not isinstance(metric, tuple): summary_txt_fpath = os.path.join(output_dirpath, 'TXT', metric.replace(' ', '_') + '.txt') summary_tex_fpath = os.path.join(output_dirpath, 'TEX', metric.replace(' ', '_') + '.tex') summary_tsv_fpath = os.path.join(output_dirpath, 'TSV', metric.replace(' ', '_') + '.tsv') summary_png_fpath = os.path.join(output_dirpath, plots_dirname, metric.replace(' ', '_') + '.' + qconfig.plot_extension) results, all_rows, cur_ref_names = get_results_for_metric(ref_names, metric, contigs_num, labels, output_dirpath_per_ref, qconfig.transposed_report_prefix + '.tsv') if not results or not results[0]: continue if cur_ref_names: transposed_table = [{'metricName': 'Assemblies', 'values': [all_rows[i]['metricName'] for i in range(1, len(all_rows))],}] for i in range(len(all_rows[0]['values'])): values = [] for j in range(1, len(all_rows)): values.append(all_rows[j]['values'][i]) transposed_table.append({'metricName': all_rows[0]['values'][i], # name of reference 'values': values}) print_file(transposed_table, summary_txt_fpath) reporting.save_tsv(summary_tsv_fpath, transposed_table) reporting.save_tex(summary_tex_fpath, transposed_table) reverse = False if reporting.get_quality(metric) == reporting.Fields.Quality.MORE_IS_BETTER: reverse = True y_label = None if metric == reporting.Fields.TOTALLEN: y_label = 'Total length ' elif metric == reporting.Fields.TOTAL_ALIGNED_LEN: y_label = 'Aligned length ' elif metric in [reporting.Fields.LARGCONTIG, reporting.Fields.N50, reporting.Fields.NGA50, reporting.Fields.MIS_EXTENSIVE_BASES]: y_label = 'Contig length ' elif metric == reporting.Fields.LARGALIGN: y_label = 'Alignment length ' plotter.draw_meta_summary_plot(html_fpath, output_dirpath, labels, cur_ref_names, all_rows, results, summary_png_fpath, title=metric, reverse=reverse, yaxis_title=y_label) if metric == reporting.Fields.MISASSEMBL: mis_results = [] report_fname = os.path.join('contigs_reports', qconfig.transposed_report_prefix + '_misassemblies' + '.tsv') if ref_names[-1] == qconfig.not_aligned_name: cur_ref_names = ref_names[:-1] for misassembl_metric in misassembl_metrics: results, all_rows, cur_ref_names = get_results_for_metric(cur_ref_names, misassembl_metric[len(reporting.Fields.TAB):], contigs_num, labels, output_dirpath_per_ref, report_fname) if results: mis_results.append(results) if mis_results: json_points = [] for contig_num in range(contigs_num): plot_fpath = os.path.join(output_dirpath, plots_dirname, qutils.slugify(labels[contig_num]) + '_misassemblies') json_points.append(plotter.draw_meta_summary_misassembl_plot(mis_results, cur_ref_names, contig_num, plot_fpath, title=labels[contig_num])) if qconfig.html_report: from quast_libs.html_saver import html_saver if ref_names[-1] == qconfig.not_aligned_name: cur_ref_names = ref_names[:-1] if json_points: html_saver.save_meta_misassemblies(html_fpath, output_dirpath, json_points, labels, cur_ref_names) logger.main_info('') logger.main_info(' Text versions of reports and plots for each metric (for all references and assemblies) are saved to ' + output_dirpath + '/')