def do(reference, contigs_fpaths, is_cyclic, output_dir, old_contigs_fpaths, bed_fpath=None): if not os.path.isdir(output_dir): os.mkdir(output_dir) logger.print_timestamp() logger.main_info('Running Contig analyzer...') success_compilation = compile_aligner(logger) if not success_compilation: logger.main_info( 'Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.' ) return dict( zip(contigs_fpaths, [AlignerStatus.FAILED] * len(contigs_fpaths))), None num_nf_errors = logger._num_nf_errors create_minimap_output_dir(output_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib2 import Parallel, delayed else: from joblib3 import Parallel, delayed genome_size, reference_chromosomes, ns_by_chromosomes = get_genome_stats( reference, skip_ns=True) threads = qconfig.max_threads if qconfig.memory_efficient else threads args = [(is_cyclic, i, contigs_fpath, output_dir, reference, reference_chromosomes, ns_by_chromosomes, old_contigs_fpath, bed_fpath, threads) for i, (contigs_fpath, old_contigs_fpath ) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))] statuses, results, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs = run_parallel( align_and_analyze, args, n_jobs) reports = [] aligner_statuses = dict(zip(contigs_fpaths, statuses)) aligned_lengths_per_fpath = dict(zip(contigs_fpaths, aligned_lengths)) misc.contigs_aligned_lengths = dict( zip(contigs_fpaths, aligned_lengths_by_contigs)) if AlignerStatus.OK in aligner_statuses.values(): if qconfig.is_combined_ref: save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger) for index, fname in enumerate(contigs_fpaths): report = reporting.get(fname) if statuses[index] == AlignerStatus.OK: reports.append( save_result(results[index], report, fname, reference, genome_size)) elif statuses[index] == AlignerStatus.NOT_ALIGNED: save_result_for_unaligned(results[index], report) if AlignerStatus.OK in aligner_statuses.values(): reporting.save_misassemblies(output_dir) reporting.save_unaligned(output_dir) from . import plotter if qconfig.draw_plots: plotter.draw_misassemblies_plot( reports, join(output_dir, 'misassemblies_plot'), 'Misassemblies') if qconfig.draw_plots or qconfig.html_report: misassemblies_in_contigs = dict( (contigs_fpaths[i], misassemblies_in_contigs[i]) for i in range(len(contigs_fpaths))) plotter.frc_plot(dirname(output_dir), reference, contigs_fpaths, misc.contigs_aligned_lengths, misassemblies_in_contigs, join(output_dir, 'misassemblies_frcurve_plot'), 'misassemblies') oks = list(aligner_statuses.values()).count(AlignerStatus.OK) not_aligned = list(aligner_statuses.values()).count( AlignerStatus.NOT_ALIGNED) failed = list(aligner_statuses.values()).count(AlignerStatus.FAILED) errors = list(aligner_statuses.values()).count(AlignerStatus.ERROR) problems = not_aligned + failed + errors all = len(aligner_statuses) logger._num_nf_errors = num_nf_errors + errors if oks == all: logger.main_info('Done.') if oks < all and problems < all: logger.main_info( 'Done for ' + str(all - problems) + ' out of ' + str(all) + '. For the rest, only basic stats are going to be evaluated.') if problems == all: logger.main_info( 'Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.' ) return aligner_statuses, aligned_lengths_per_fpath
def do(reference, contigs_fpaths, is_cyclic, output_dir, old_contigs_fpaths, bed_fpath=None): if not os.path.isdir(output_dir): os.mkdir(output_dir) logger.print_timestamp() logger.main_info('Running Contig analyzer...') success_compilation = compile_aligner(logger) if not success_compilation: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return dict(zip(contigs_fpaths, [NucmerStatus.FAILED] * len(contigs_fpaths))), None if qconfig.draw_plots: compile_gnuplot(logger, only_clean=False) num_nf_errors = logger._num_nf_errors create_nucmer_output_dir(output_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed if not qconfig.splitted_ref and not qconfig.memory_efficient: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: if len(contigs_fpaths) >= len(qconfig.splitted_ref) and not qconfig.memory_efficient: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: statuses_results_lengths_tuples = [] for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths)): statuses_results_lengths_tuples.append(align_and_analyze( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, parallel_by_chr=True, threads=qconfig.max_threads)) # unzipping statuses, results, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs =\ [[x[i] for x in statuses_results_lengths_tuples] for i in range(5)] reports = [] nucmer_statuses = dict(zip(contigs_fpaths, statuses)) aligned_lengths_per_fpath = dict(zip(contigs_fpaths, aligned_lengths)) misc.contigs_aligned_lengths = dict(zip(contigs_fpaths, aligned_lengths_by_contigs)) if NucmerStatus.OK in nucmer_statuses.values(): if qconfig.is_combined_ref: save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger) for index, fname in enumerate(contigs_fpaths): report = reporting.get(fname) if statuses[index] == NucmerStatus.OK: reports.append(save_result(results[index], report, fname, reference)) elif statuses[index] == NucmerStatus.NOT_ALIGNED: save_result_for_unaligned(results[index], report) if NucmerStatus.OK in nucmer_statuses.values(): reporting.save_misassemblies(output_dir) reporting.save_unaligned(output_dir) from . import plotter if qconfig.draw_plots: plotter.draw_misassemblies_plot(reports, join(output_dir, 'misassemblies_plot'), 'Misassemblies') if qconfig.draw_plots or qconfig.html_report: misassemblies_in_contigs = dict((contigs_fpaths[i], misassemblies_in_contigs[i]) for i in range(len(contigs_fpaths))) plotter.frc_plot(dirname(output_dir), reference, contigs_fpaths, misc.contigs_aligned_lengths, misassemblies_in_contigs, join(output_dir, 'misassemblies_frcurve_plot'), 'misassemblies') oks = list(nucmer_statuses.values()).count(NucmerStatus.OK) not_aligned = list(nucmer_statuses.values()).count(NucmerStatus.NOT_ALIGNED) failed = list(nucmer_statuses.values()).count(NucmerStatus.FAILED) errors = list(nucmer_statuses.values()).count(NucmerStatus.ERROR) problems = not_aligned + failed + errors all = len(nucmer_statuses) logger._num_nf_errors = num_nf_errors + errors if oks == all: logger.main_info('Done.') if oks < all and problems < all: logger.main_info('Done for ' + str(all - problems) + ' out of ' + str(all) + '. For the rest, only basic stats are going to be evaluated.') if problems == all: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return nucmer_statuses, aligned_lengths_per_fpath
def do(reference, contigs_fpaths, is_cyclic, output_dir, old_contigs_fpaths, bed_fpath=None): if not os.path.isdir(output_dir): os.mkdir(output_dir) logger.print_timestamp() logger.main_info('Running Contig analyzer...') num_nf_errors = logger._num_nf_errors success_compilation = compile_aligner(logger) if qconfig.test and is_emem_aligner(): success_compilation = check_emem_functionality(logger) if not success_compilation: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return dict(zip(contigs_fpaths, [NucmerStatus.FAILED] * len(contigs_fpaths))), None create_nucmer_output_dir(output_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) if qconfig.memory_efficient: threads = 1 else: threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed if not qconfig.splitted_ref: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: if len(contigs_fpaths) >= len(qconfig.splitted_ref) and not qconfig.memory_efficient: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: statuses_results_lengths_tuples = [] for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths)): statuses_results_lengths_tuples.append(align_and_analyze( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, parallel_by_chr=True, threads=qconfig.max_threads)) # unzipping statuses, results, aligned_lengths = [x[0] for x in statuses_results_lengths_tuples], \ [x[1] for x in statuses_results_lengths_tuples], \ [x[2] for x in statuses_results_lengths_tuples] reports = [] for index, fname in enumerate(contigs_fpaths): report = reporting.get(fname) if statuses[index] == NucmerStatus.OK: reports.append(save_result(results[index], report, fname)) elif statuses[index] == NucmerStatus.NOT_ALIGNED: save_result_for_unaligned(results[index], report) nucmer_statuses = dict(zip(contigs_fpaths, statuses)) aligned_lengths_per_fpath = dict(zip(contigs_fpaths, aligned_lengths)) if NucmerStatus.OK in nucmer_statuses.values(): reporting.save_misassemblies(output_dir) reporting.save_unaligned(output_dir) if qconfig.draw_plots: from . import plotter plotter.draw_misassembl_plot(reports, join(output_dir, 'misassemblies_plot'), 'Misassemblies') if qconfig.is_combined_ref: save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger) oks = list(nucmer_statuses.values()).count(NucmerStatus.OK) not_aligned = list(nucmer_statuses.values()).count(NucmerStatus.NOT_ALIGNED) failed = list(nucmer_statuses.values()).count(NucmerStatus.FAILED) errors = list(nucmer_statuses.values()).count(NucmerStatus.ERROR) problems = not_aligned + failed + errors all = len(nucmer_statuses) logger._num_nf_errors = num_nf_errors + errors if oks == all: logger.main_info('Done.') if oks < all and problems < all: logger.main_info('Done for ' + str(all - problems) + ' out of ' + str(all) + '. For the rest, only basic stats are going to be evaluated.') if problems == all: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') if not qconfig.test and is_emem_aligner(): logger.warning('Please rerun QUAST using --test option to ensure that E-MEM aligner works properly.') return nucmer_statuses, aligned_lengths_per_fpath
def do(reference, contigs_fpaths, is_cyclic, output_dir, old_contigs_fpaths, bed_fpath=None): if not os.path.isdir(output_dir): os.mkdir(output_dir) logger.print_timestamp() logger.main_info('Running Contig analyzer...') success_compilation = compile_aligner(logger) if qconfig.test and is_emem_aligner(): success_compilation = check_emem_functionality(logger) if not success_compilation: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return dict(zip(contigs_fpaths, [NucmerStatus.FAILED] * len(contigs_fpaths))), None if qconfig.draw_plots: compile_gnuplot(logger, only_clean=False) num_nf_errors = logger._num_nf_errors create_nucmer_output_dir(output_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) if qconfig.memory_efficient: threads = 1 else: threads = max(1, qconfig.max_threads // n_jobs) if is_python2(): from joblib import Parallel, delayed else: from joblib3 import Parallel, delayed if not qconfig.splitted_ref and not qconfig.memory_efficient: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: if len(contigs_fpaths) >= len(qconfig.splitted_ref) and not qconfig.memory_efficient: statuses_results_lengths_tuples = Parallel(n_jobs=n_jobs)(delayed(align_and_analyze)( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, threads=threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))) else: statuses_results_lengths_tuples = [] for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths)): statuses_results_lengths_tuples.append(align_and_analyze( is_cyclic, i, contigs_fpath, output_dir, reference, old_contigs_fpath, bed_fpath, parallel_by_chr=True, threads=qconfig.max_threads)) # unzipping statuses, results, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs =\ [[x[i] for x in statuses_results_lengths_tuples] for i in range(5)] reports = [] nucmer_statuses = dict(zip(contigs_fpaths, statuses)) aligned_lengths_per_fpath = dict(zip(contigs_fpaths, aligned_lengths)) misc.contigs_aligned_lengths = dict(zip(contigs_fpaths, aligned_lengths_by_contigs)) if NucmerStatus.OK in nucmer_statuses.values(): if qconfig.is_combined_ref: save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger) for index, fname in enumerate(contigs_fpaths): report = reporting.get(fname) if statuses[index] == NucmerStatus.OK: reports.append(save_result(results[index], report, fname, reference)) elif statuses[index] == NucmerStatus.NOT_ALIGNED: save_result_for_unaligned(results[index], report) if NucmerStatus.OK in nucmer_statuses.values(): reporting.save_misassemblies(output_dir) reporting.save_unaligned(output_dir) from . import plotter if qconfig.draw_plots: plotter.draw_misassemblies_plot(reports, join(output_dir, 'misassemblies_plot'), 'Misassemblies') if qconfig.draw_plots or qconfig.html_report: misassemblies_in_contigs = dict((contigs_fpaths[i], misassemblies_in_contigs[i]) for i in range(len(contigs_fpaths))) plotter.frc_plot(dirname(output_dir), reference, contigs_fpaths, misc.contigs_aligned_lengths, misassemblies_in_contigs, join(output_dir, 'misassemblies_frcurve_plot'), 'misassemblies') oks = list(nucmer_statuses.values()).count(NucmerStatus.OK) not_aligned = list(nucmer_statuses.values()).count(NucmerStatus.NOT_ALIGNED) failed = list(nucmer_statuses.values()).count(NucmerStatus.FAILED) errors = list(nucmer_statuses.values()).count(NucmerStatus.ERROR) problems = not_aligned + failed + errors all = len(nucmer_statuses) logger._num_nf_errors = num_nf_errors + errors if oks == all: logger.main_info('Done.') if oks < all and problems < all: logger.main_info('Done for ' + str(all - problems) + ' out of ' + str(all) + '. For the rest, only basic stats are going to be evaluated.') if problems == all: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') if not qconfig.test and is_emem_aligner(): logger.warning('Please rerun QUAST using --test option to ensure that E-MEM aligner works properly.') return nucmer_statuses, aligned_lengths_per_fpath
def do(reference, contigs_fpaths, is_cyclic, output_dir, old_contigs_fpaths, bed_fpath=None): if not os.path.isdir(output_dir): os.mkdir(output_dir) logger.print_timestamp() logger.main_info('Running Contig analyzer...') success_compilation = compile_aligner(logger) if not success_compilation: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return dict(zip(contigs_fpaths, [AlignerStatus.FAILED] * len(contigs_fpaths))), None num_nf_errors = logger._num_nf_errors create_minimap_output_dir(output_dir) n_jobs = min(len(contigs_fpaths), qconfig.max_threads) threads = max(1, qconfig.max_threads // n_jobs) genome_size, reference_chromosomes, ns_by_chromosomes = get_genome_stats(reference, skip_ns=True) threads = qconfig.max_threads if qconfig.memory_efficient else threads args = [(is_cyclic, i, contigs_fpath, output_dir, reference, reference_chromosomes, ns_by_chromosomes, old_contigs_fpath, bed_fpath, threads) for i, (contigs_fpath, old_contigs_fpath) in enumerate(zip(contigs_fpaths, old_contigs_fpaths))] statuses, results, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs = run_parallel(align_and_analyze, args, n_jobs) reports = [] aligner_statuses = dict(zip(contigs_fpaths, statuses)) aligned_lengths_per_fpath = dict(zip(contigs_fpaths, aligned_lengths)) misc.contigs_aligned_lengths = dict(zip(contigs_fpaths, aligned_lengths_by_contigs)) if AlignerStatus.OK in aligner_statuses.values(): if qconfig.is_combined_ref: save_combined_ref_stats(results, contigs_fpaths, ref_labels_by_chromosomes, output_dir, logger) for index, fname in enumerate(contigs_fpaths): report = reporting.get(fname) if statuses[index] == AlignerStatus.OK: reports.append(save_result(results[index], report, fname, reference, genome_size)) elif statuses[index] == AlignerStatus.NOT_ALIGNED: save_result_for_unaligned(results[index], report) if AlignerStatus.OK in aligner_statuses.values(): reporting.save_misassemblies(output_dir) reporting.save_unaligned(output_dir) from . import plotter if qconfig.draw_plots: plotter.draw_misassemblies_plot(reports, join(output_dir, 'misassemblies_plot'), 'Misassemblies') if qconfig.draw_plots or qconfig.html_report: misassemblies_in_contigs = dict((contigs_fpaths[i], misassemblies_in_contigs[i]) for i in range(len(contigs_fpaths))) plotter.frc_plot(dirname(output_dir), reference, contigs_fpaths, misc.contigs_aligned_lengths, misassemblies_in_contigs, join(output_dir, 'misassemblies_frcurve_plot'), 'misassemblies') oks = list(aligner_statuses.values()).count(AlignerStatus.OK) not_aligned = list(aligner_statuses.values()).count(AlignerStatus.NOT_ALIGNED) failed = list(aligner_statuses.values()).count(AlignerStatus.FAILED) errors = list(aligner_statuses.values()).count(AlignerStatus.ERROR) problems = not_aligned + failed + errors all = len(aligner_statuses) logger._num_nf_errors = num_nf_errors + errors if oks == all: logger.main_info('Done.') if oks < all and problems < all: logger.main_info('Done for ' + str(all - problems) + ' out of ' + str(all) + '. For the rest, only basic stats are going to be evaluated.') if problems == all: logger.main_info('Failed aligning the contigs for all the assemblies. Only basic stats are going to be evaluated.') return aligner_statuses, aligned_lengths_per_fpath