def main(args): check_dirpath( qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' + 'Please, put QUAST in a different directory, then try again.\n', exit_code=3) if not args: qconfig.usage(stream=sys.stderr) sys.exit(1) try: import imp imp.reload(qconfig) imp.reload(qutils) except: reload(qconfig) reload(qutils) try: locale.setlocale(locale.LC_ALL, 'en_US.utf8') except Exception: try: locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') except Exception: logger.warning('Python locale settings can\'t be changed') quast_path = [os.path.realpath(__file__)] quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args) output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.main_info() logger.print_params() ######################################################################## from quast_libs import reporting reports = reporting.reports try: import imp imp.reload(reporting) except: reload(reporting) reporting.reports = reports reporting.assembly_fpaths = [] from quast_libs import plotter # Do not remove this line! It would lead to a warning in matplotlib. if qconfig.is_combined_ref: corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname) else: if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) qconfig.set_max_threads(logger) check_reads_fpaths(logger) # PROCESSING REFERENCE if ref_fpath: logger.main_info() logger.main_info('Reference:') original_ref_fpath = ref_fpath ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath) if qconfig.optimal_assembly: if not qconfig.pacbio_reads and not qconfig.nanopore_reads and not qconfig.mate_pairs: logger.warning( 'Optimal assembly cannot be created. It requires mate-pairs or long reads (Pacbio SMRT or Oxford Nanopore).' ) else: optimal_assembly_fpath = optimal_assembly.do( ref_fpath, original_ref_fpath, os.path.join(output_dirpath, qconfig.optimal_assembly_basename)) if optimal_assembly_fpath is not None: contigs_fpaths.insert(0, optimal_assembly_fpath) labels.insert(0, 'Optimal') labels = qutils.process_labels(contigs_fpaths, labels) else: ref_fpath = '' # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs( contigs_fpaths, corrected_dirpath, labels, reporting) for contigs_fpath in contigs_fpaths: report = reporting.get(contigs_fpath) report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath)) qconfig.assemblies_num = len(contigs_fpaths) cov_fpath = qconfig.cov_fpath physical_cov_fpath = qconfig.phys_cov_fpath if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths: bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do( ref_fpath, contigs_fpaths, os.path.join(output_dirpath, qconfig.reads_stats_dirname), external_logger=logger) qconfig.bed = bed_fpath if not contigs_fpaths: logger.error( "None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.", fake_if_nested_run=True) return 4 if qconfig.used_colors and qconfig.used_ls: for i, label in enumerate(labels): plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i]) qconfig.assemblies_fpaths = contigs_fpaths # Where all pdfs will be saved all_pdf_fpath = None if qconfig.draw_plots and plotter.can_draw_plots: all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname) if qconfig.json_output_dirpath: from quast_libs.html_saver import json_saver if json_saver.simplejson_error: qconfig.json_output_dirpath = None ######################################################################## ### Stats and plots ######################################################################## from quast_libs import basic_stats icarus_gc_fpath, circos_gc_fpath = basic_stats.do( ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), output_dirpath) if qconfig.large_genome and ref_fpath: unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath, contigs_fpaths, logger) aligned_contigs_fpaths = [] aligned_lengths_lists = [] contig_alignment_plot_fpath = None icarus_html_fpath = None circos_png_fpath = None if ref_fpath: ######################################################################## ### former PLANTAKOLYA, PLANTAGORA ######################################################################## from quast_libs import contigs_analyzer is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref aligner_statuses, aligned_lengths_per_fpath = contigs_analyzer.do( ref_fpath, contigs_fpaths, is_cyclic, os.path.join(output_dirpath, 'contigs_reports'), old_contigs_fpaths, qconfig.bed) for contigs_fpath in contigs_fpaths: if aligner_statuses[ contigs_fpath] == contigs_analyzer.AlignerStatus.OK: aligned_contigs_fpaths.append(contigs_fpath) aligned_lengths_lists.append( aligned_lengths_per_fpath[contigs_fpath]) # Before continue evaluating, check if aligner didn't skip all of the contigs files. detailed_contigs_reports_dirpath = None features_containers = None if len(aligned_contigs_fpaths) and ref_fpath: detailed_contigs_reports_dirpath = os.path.join( output_dirpath, 'contigs_reports') ######################################################################## ### NAx and NGAx ("aligned Nx and NGx") ######################################################################## from quast_libs import aligned_stats aligned_stats.do(ref_fpath, aligned_contigs_fpaths, output_dirpath, aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats')) ######################################################################## ### GENOME_ANALYZER ######################################################################## from quast_libs import genome_analyzer features_containers = genome_analyzer.do( ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.features, qconfig.operons, detailed_contigs_reports_dirpath, os.path.join(output_dirpath, 'genome_stats')) genes_by_labels = None if qconfig.gene_finding: if qconfig.glimmer: ######################################################################## ### Glimmer ######################################################################## from quast_libs import glimmer genes_by_labels = glimmer.do( contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes')) if not qconfig.glimmer or qconfig.test: ######################################################################## ### GeneMark ######################################################################## from quast_libs import genemark genes_by_labels = genemark.do( contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'), qconfig.prokaryote, qconfig.metagenemark) else: logger.main_info("") logger.notice( "Genes are not predicted by default. Use --gene-finding option to enable it." ) if qconfig.rna_gene_finding: run_barrnap.do(contigs_fpaths, os.path.join(output_dirpath, 'predicted_genes'), logger) if qconfig.run_busco and not qconfig.is_combined_ref: if qconfig.platform_name == 'macosx': logger.main_info("") logger.warning("BUSCO can be run on Linux only") elif sys.version[0:3] == '2.5': logger.main_info("") logger.warning( "BUSCO does not support Python versions older than 2.6.") else: from quast_libs import run_busco run_busco.do(contigs_fpaths, os.path.join(output_dirpath, qconfig.busco_dirname), logger) ######################################################################## reports_fpaths, transposed_reports_fpaths = reporting.save_total( output_dirpath) ######################################################################## ### LARGE DRAWING TASKS ######################################################################## if qconfig.draw_plots or qconfig.create_icarus_html: logger.print_timestamp() logger.main_info('Creating large visual summaries...') logger.main_info( 'This may take a while: press Ctrl-C to skip this step..') try: if detailed_contigs_reports_dirpath: report_for_icarus_fpath_pattern = os.path.join( detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern) stdout_pattern = os.path.join( detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern) else: report_for_icarus_fpath_pattern = None stdout_pattern = None draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html draw_circos_plot = qconfig.draw_plots and ref_fpath and len( aligned_contigs_fpaths) and not qconfig.space_efficient number_of_steps = sum([ int(bool(value)) for value in [draw_alignment_plots, draw_circos_plot, all_pdf_fpath] ]) if draw_alignment_plots: ######################################################################## ### VISUALIZE CONTIG ALIGNMENT ######################################################################## logger.main_info(' 1 of %d: Creating Icarus viewers...' % number_of_steps) from quast_libs import icarus icarus_html_fpath, contig_alignment_plot_fpath = icarus.do( contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath, stdout_pattern=stdout_pattern, features=features_containers, cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, gc_fpath=icarus_gc_fpath, json_output_dir=qconfig.json_output_dirpath, genes_by_labels=genes_by_labels) if draw_circos_plot: logger.main_info( ' %d of %d: Creating Circos plots...' % (2 if draw_alignment_plots else 1, number_of_steps)) from quast_libs import circos circos_png_fpath, circos_legend_fpath = circos.do( ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern, circos_gc_fpath, features_containers, cov_fpath, os.path.join(output_dirpath, 'circos'), logger) if all_pdf_fpath: # full report in PDF format: all tables and plots logger.main_info( ' %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps)) plotter.fill_all_pdf_file(all_pdf_fpath) logger.main_info('Done') except KeyboardInterrupt: logger.main_info('..step skipped!') if all_pdf_fpath and os.path.isfile(all_pdf_fpath): os.remove(all_pdf_fpath) ######################################################################## ### TOTAL REPORT ######################################################################## logger.print_timestamp() logger.main_info('RESULTS:') logger.main_info(' Text versions of total report are saved to ' + reports_fpaths) logger.main_info( ' Text versions of transposed total report are saved to ' + transposed_reports_fpaths) if qconfig.html_report: from quast_libs.html_saver import html_saver html_saver.save_colors(output_dirpath, contigs_fpaths, plotter_data.dict_color_and_ls) html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath) if all_pdf_fpath and os.path.isfile(all_pdf_fpath): logger.main_info(' PDF version (tables and plots) is saved to ' + all_pdf_fpath) if circos_png_fpath: logger.main_info( ' Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s' % (circos_png_fpath, circos_legend_fpath, circos_png_fpath.replace('.png', '.conf'))) if icarus_html_fpath: logger.main_info(' Icarus (contig browser) is saved to %s' % icarus_html_fpath) if qconfig.draw_svg and contig_alignment_plot_fpath: logger.main_info(' Contig alignment plot is saved to %s' % contig_alignment_plot_fpath) cleanup(corrected_dirpath) return logger.finish_up(check_test=qconfig.test)
def main(args): check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' + 'Please, put QUAST in a different directory, then try again.\n', exit_code=3) if not args: qconfig.usage(stream=sys.stderr) sys.exit(1) try: import imp imp.reload(qconfig) imp.reload(qutils) except: reload(qconfig) reload(qutils) try: locale.setlocale(locale.LC_ALL, 'en_US.utf8') except Exception: try: locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') except Exception: logger.warning('Python locale settings can\'t be changed') quast_path = [os.path.realpath(__file__)] quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args) output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.main_info() logger.print_params() ######################################################################## from quast_libs import reporting reports = reporting.reports try: import imp imp.reload(reporting) except: reload(reporting) reporting.reports = reports reporting.assembly_fpaths = [] from quast_libs import plotter # Do not remove this line! It would lead to a warning in matplotlib. if qconfig.is_combined_ref: corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname) else: if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) qconfig.set_max_threads(logger) check_reads_fpaths(logger) # PROCESSING REFERENCE if ref_fpath: logger.main_info() logger.main_info('Reference:') original_ref_fpath = ref_fpath ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath) if qconfig.ideal_assembly: ideal_assembly_fpath = ideal_assembly.do(ref_fpath, original_ref_fpath, os.path.join(output_dirpath, qconfig.ideal_assembly_basename)) if ideal_assembly_fpath is not None: contigs_fpaths.insert(0, ideal_assembly_fpath) labels.insert(0, 'IDEAL ASSEMBLY') labels = qutils.process_labels(contigs_fpaths, labels) else: ref_fpath = '' # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting) for contigs_fpath in contigs_fpaths: report = reporting.get(contigs_fpath) report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath)) qconfig.assemblies_num = len(contigs_fpaths) cov_fpath = qconfig.cov_fpath physical_cov_fpath = qconfig.phys_cov_fpath if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths: bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, qconfig.reads_stats_dirname), external_logger=logger) qconfig.bed = bed_fpath if not contigs_fpaths: logger.error("None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.", fake_if_nested_run=True) return 4 if qconfig.used_colors and qconfig.used_ls: for i, label in enumerate(labels): plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i]) qconfig.assemblies_fpaths = contigs_fpaths # Where all pdfs will be saved all_pdf_fpath = None if qconfig.draw_plots and plotter.can_draw_plots: all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname) if qconfig.json_output_dirpath: from quast_libs.html_saver import json_saver if json_saver.simplejson_error: qconfig.json_output_dirpath = None ######################################################################## ### Stats and plots ######################################################################## from quast_libs import basic_stats icarus_gc_fpath, circos_gc_fpath = basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), output_dirpath) if qconfig.large_genome and ref_fpath: unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath, contigs_fpaths, logger) aligned_contigs_fpaths = [] aligned_lengths_lists = [] contig_alignment_plot_fpath = None icarus_html_fpath = None circos_png_fpath = None if ref_fpath: ######################################################################## ### former PLANTAKOLYA, PLANTAGORA ######################################################################## from quast_libs import contigs_analyzer is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do( ref_fpath, contigs_fpaths, is_cyclic, os.path.join(output_dirpath, 'contigs_reports'), old_contigs_fpaths, qconfig.bed) for contigs_fpath in contigs_fpaths: if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK: aligned_contigs_fpaths.append(contigs_fpath) aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath]) # Before continue evaluating, check if nucmer didn't skip all of the contigs files. detailed_contigs_reports_dirpath = None features_containers = None if len(aligned_contigs_fpaths) and ref_fpath: detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports') ######################################################################## ### NAx and NGAx ("aligned Nx and NGx") ######################################################################## from quast_libs import aligned_stats aligned_stats.do( ref_fpath, aligned_contigs_fpaths, output_dirpath, aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats')) ######################################################################## ### GENOME_ANALYZER ######################################################################## from quast_libs import genome_analyzer features_containers = genome_analyzer.do( ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.genes, qconfig.operons, detailed_contigs_reports_dirpath, os.path.join(output_dirpath, 'genome_stats')) genes_by_labels = None if qconfig.gene_finding: if qconfig.glimmer: ######################################################################## ### Glimmer ######################################################################## from quast_libs import glimmer genes_by_labels = glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes')) if not qconfig.glimmer or qconfig.test: ######################################################################## ### GeneMark ######################################################################## from quast_libs import genemark genes_by_labels = genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'), qconfig.prokaryote, qconfig.metagenemark) else: logger.main_info("") logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.") if qconfig.rna_gene_finding: run_barrnap.do(contigs_fpaths, os.path.join(output_dirpath, 'predicted_genes'), logger) if qconfig.run_busco and not qconfig.is_combined_ref: if qconfig.platform_name == 'macosx': logger.main_info("") logger.warning("BUSCO can be run on Linux only") elif sys.version[0:3] == '2.5': logger.main_info("") logger.warning("BUSCO does not support Python versions older than 2.6.") else: from quast_libs import run_busco run_busco.do(contigs_fpaths, os.path.join(output_dirpath, qconfig.busco_dirname), logger) ######################################################################## reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath) ######################################################################## ### LARGE DRAWING TASKS ######################################################################## if qconfig.draw_plots or qconfig.create_icarus_html: logger.print_timestamp() logger.main_info('Creating large visual summaries...') logger.main_info('This may take a while: press Ctrl-C to skip this step..') try: if detailed_contigs_reports_dirpath: report_for_icarus_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern) stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern) else: report_for_icarus_fpath_pattern = None stdout_pattern = None draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html draw_circos_plot = qconfig.draw_plots and ref_fpath and len(aligned_contigs_fpaths) and not qconfig.space_efficient number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, draw_circos_plot, all_pdf_fpath]]) if draw_alignment_plots: ######################################################################## ### VISUALIZE CONTIG ALIGNMENT ######################################################################## logger.main_info(' 1 of %d: Creating Icarus viewers...' % number_of_steps) from quast_libs import icarus icarus_html_fpath, contig_alignment_plot_fpath = icarus.do( contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath, stdout_pattern=stdout_pattern, features=features_containers, cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, gc_fpath=icarus_gc_fpath, json_output_dir=qconfig.json_output_dirpath, genes_by_labels=genes_by_labels) if draw_circos_plot: logger.main_info(' %d of %d: Creating Circos plots...' % (2 if draw_alignment_plots else 1, number_of_steps)) from quast_libs import circos circos_png_fpath, circos_legend_fpath = circos.do(ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern, circos_gc_fpath, features_containers, cov_fpath, os.path.join(output_dirpath, 'circos'), logger) if all_pdf_fpath: # full report in PDF format: all tables and plots logger.main_info(' %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps)) plotter.fill_all_pdf_file(all_pdf_fpath) logger.main_info('Done') except KeyboardInterrupt: logger.main_info('..step skipped!') if all_pdf_fpath and os.path.isfile(all_pdf_fpath): os.remove(all_pdf_fpath) ######################################################################## ### TOTAL REPORT ######################################################################## logger.print_timestamp() logger.main_info('RESULTS:') logger.main_info(' Text versions of total report are saved to ' + reports_fpaths) logger.main_info(' Text versions of transposed total report are saved to ' + transposed_reports_fpaths) if qconfig.html_report: from quast_libs.html_saver import html_saver html_saver.save_colors(output_dirpath, contigs_fpaths, plotter_data.dict_color_and_ls) html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath) if all_pdf_fpath and os.path.isfile(all_pdf_fpath): logger.main_info(' PDF version (tables and plots) is saved to ' + all_pdf_fpath) if circos_png_fpath: logger.main_info(' Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s' % (circos_png_fpath, circos_legend_fpath, circos_png_fpath.replace('.png', '.conf'))) if icarus_html_fpath: logger.main_info(' Icarus (contig browser) is saved to %s' % icarus_html_fpath) if qconfig.draw_svg and contig_alignment_plot_fpath: logger.main_info(' Contig alignment plot is saved to %s' % contig_alignment_plot_fpath) cleanup(corrected_dirpath) return logger.finish_up(check_test=qconfig.test)