def correct_assemblies(contigs_fpaths, output_dirpath, labels): corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) # we need correction but do not need min-contig filtration min_contig = qconfig.min_contig qconfig.min_contig = 0 corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None) qconfig.min_contig = min_contig assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths] corrected_labels = [asm.label for asm in assemblies] if qconfig.draw_plots or qconfig.html_report: from quast_libs import plotter corr_fpaths = [asm.fpath for asm in assemblies] corr_labels = [asm.label for asm in assemblies] plotter.save_colors_and_ls(corr_fpaths, labels=corr_labels) return assemblies, corrected_labels
def correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting): ## removing from contigs' names special characters because: ## 1) Some embedded tools can fail on some strings with "...", "+", "-", etc ## 2) Nucmer fails on names like "contig 1_bla_bla", "contig 2_bla_bla" (it interprets as a contig's name only the first word of caption and gets ambiguous contigs names) if qconfig.max_threads is None: qconfig.max_threads = 1 n_jobs = min(len(contigs_fpaths), qconfig.max_threads) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed logger.main_info(' Pre-processing...') corrected_info = Parallel(n_jobs=n_jobs)(delayed(parallel_correct_contigs)(i, contigs_fpath, corrected_dirpath, labels) for i, contigs_fpath in enumerate(contigs_fpaths)) corrected_contigs_fpaths = [] old_contigs_fpaths = [] for contig_idx, (old_fpaths, corr_fpaths, broken_scaffold_fpaths, logs) in enumerate(corrected_info): label = labels[contig_idx] logger.main_info('\n'.join(logs)) for old_fpath in old_fpaths: old_contigs_fpaths.append(old_fpath) qconfig.assembly_labels_by_fpath[old_fpath] = label for corr_fpath, lengths in corr_fpaths: corrected_contigs_fpaths.append(corr_fpath) qconfig.assembly_labels_by_fpath[corr_fpath] = label add_lengths_to_report(lengths, reporting, corr_fpath) for broken_fpath, lengths in broken_scaffold_fpaths: old_contigs_fpaths.append(broken_fpath) corrected_contigs_fpaths.append(broken_fpath) qconfig.assembly_labels_by_fpath[broken_fpath] = label + '_broken' add_lengths_to_report(lengths, reporting, broken_fpath) if qconfig.draw_plots or qconfig.html_report: from quast_libs import plotter if not plotter.dict_color_and_ls: plotter.save_colors_and_ls(corrected_contigs_fpaths) return corrected_contigs_fpaths, old_contigs_fpaths
def correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting): ## removing from contigs' names special characters because: ## 1) Some embedded tools can fail on some strings with "...", "+", "-", etc ## 2) Nucmer fails on names like "contig 1_bla_bla", "contig 2_bla_bla" (it interprets as a contig's name only the first word of caption and gets ambiguous contigs names) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) from joblib import Parallel, delayed logger.main_info(' Pre-processing...') corrected_info = Parallel(n_jobs=n_jobs)( delayed(parallel_correct_contigs)(i, contigs_fpath, corrected_dirpath, labels) for i, contigs_fpath in enumerate(contigs_fpaths)) corrected_contigs_fpaths = [] old_contigs_fpaths = [] for contig_idx, (old_fpaths, corr_fpaths, broken_scaffold_fpaths, logs) in enumerate(corrected_info): label = labels[contig_idx] logger.main_info('\n'.join(logs)) for old_fpath in old_fpaths: old_contigs_fpaths.append(old_fpath) qconfig.assembly_labels_by_fpath[old_fpath] = label for corr_fpath, lengths in corr_fpaths: corrected_contigs_fpaths.append(corr_fpath) qconfig.assembly_labels_by_fpath[corr_fpath] = label add_lengths_to_report(lengths, reporting, corr_fpath) for broken_fpath, lengths in broken_scaffold_fpaths: old_contigs_fpaths.append(broken_fpath) corrected_contigs_fpaths.append(broken_fpath) qconfig.assembly_labels_by_fpath[broken_fpath] = label + '_broken' add_lengths_to_report(lengths, reporting, broken_fpath) if qconfig.draw_plots or qconfig.html_report: from quast_libs import plotter if not plotter.dict_color_and_ls: plotter.save_colors_and_ls(corrected_contigs_fpaths) return corrected_contigs_fpaths, old_contigs_fpaths