elif opt in ("-m", "--meta"): pass elif opt in ["--no-plots"]: pass elif opt in ["--no-html"]: pass else: logger.error('Unknown option: %s. Use -h for help.' % (opt + (' ' + arg) if arg else ''), to_stderr=True) sys.exit(2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: quast_py_args.remove(contigs_fpath) # # Removing outout dir if exists # if output_dirpath: # 'output dir was specified with -o option' # if os.path.isdir(output_dirpath): # shutil.rmtree(output_dirpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir(output_dirpath, None, make_latest_symlink, save_json=False, remove_old=True)
def main(args): if ' ' in quast_dirpath: logger.error('QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(quast_dirpath) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) min_contig = qconfig.min_contig genes = [] operons = [] draw_plots = qconfig.draw_plots html_report = qconfig.html_report make_latest_symlink = True try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage(meta=True) sys.exit(2) quast_py_args = args[:] test_mode = False for opt, arg in options: if opt in ('-d', '--debug'): options.remove((opt, arg)) qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt == '--test': options.remove((opt, arg)) quast_py_args.remove(opt) options += [('-o', 'quast_test_output'), ('-R', 'test_data/meta_ref_1.fasta,' 'test_data/meta_ref_2.fasta,' 'test_data/meta_ref_3.fasta')] contigs_fpaths += ['test_data/meta_contigs_1.fasta', 'test_data/meta_contigs_2.fasta'] test_mode = True elif opt.startswith('--help'): qconfig.usage(opt == "--help-hidden", meta=True) sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=True) sys.exit(2) ref_fpaths = [] combined_ref_fpath = '' output_dirpath = None labels = None all_labels_from_dirs = False for opt, arg in options: if opt in ('-o', "--output-dir"): # Removing output dir arg in order to further # construct other quast calls from this options if opt in quast_py_args and arg in quast_py_args: quast_py_args.remove(opt) quast_py_args.remove(arg) output_dirpath = os.path.abspath(arg) make_latest_symlink = False elif opt in ('-G', "--genes"): assert_file_exists(arg, 'genes') genes += arg elif opt in ('-O', "--operons"): assert_file_exists(arg, 'operons') operons += arg elif opt in ('-R', "--reference"): # Removing reference args in order to further # construct quast calls from this args with other reference options if opt in quast_py_args and arg in quast_py_args: quast_py_args.remove(opt) quast_py_args.remove(arg) ref_fpaths = arg.split(',') for i, ref_fpath in enumerate(ref_fpaths): assert_file_exists(ref_fpath, 'reference') ref_fpaths[i] = ref_fpath elif opt in ('-M', "--min-contig"): min_contig = int(arg) elif opt in ('-T', "--threads"): pass elif opt in ('-l', '--labels'): quast_py_args.remove(opt) quast_py_args.remove(arg) labels = quast.parse_labels(arg, contigs_fpaths) elif opt == '-L': quast_py_args.remove(opt) all_labels_from_dirs = True elif opt in ('-j', '--save-json'): pass elif opt in ('-J', '--save-json-to'): pass elif opt in ('-t', "--contig-thresholds"): pass elif opt in ('-c', "--mincluster"): pass elif opt == "--est-ref-size": pass elif opt in ('-S', "--gene-thresholds"): pass elif opt in ('-s', "--scaffolds"): pass elif opt == "--gage": pass elif opt == "--debug": pass elif opt in ('-e', "--eukaryote"): pass elif opt in ('-f', "--gene-finding"): pass elif opt in ('-a', "--ambiguity-usage"): pass elif opt in ('-u', "--use-all-alignments"): pass elif opt in ('-n', "--strict-NA"): pass elif opt in ("-m", "--meta"): pass elif opt in ["--no-plots"]: pass elif opt in ["--no-html"]: pass else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: if contigs_fpath in quast_py_args: quast_py_args.remove(contigs_fpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir( output_dirpath, None, make_latest_symlink, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) logger.print_command_line([os.path.realpath(__file__)] + args, wrap_after=None) logger.start() ######################################################################## from libs import reporting reload(reporting) if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES common_ref_fasta_ext = '' if ref_fpaths: logger.info() logger.info('Reference(s):') ref_fpaths, common_ref_fasta_ext, combined_ref_fpath =\ _correct_refrences(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.info() logger.info('Contigs:') assemblies = _correct_contigs(contigs_fpaths, corrected_dirpath, min_contig, labels) if not assemblies: logger.error("None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.") return 4 # Running QUAST(s) quast_py_args += ['--meta'] if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.info() logger.notice('No references provided, starting quast.py with MetaGeneMark gene finder') _start_quast_main( None, quast_py_args, assemblies=assemblies, output_dirpath=os.path.join(output_dirpath, 'quast_output'), exit_on_exception=True) exit(0) # Running combined reference run_name = 'for the combined reference' logger.info() logger.info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=os.path.join(output_dirpath, 'combined_quast_output'), num_notifications_tuple=total_num_notifications) # Partitioning contigs into bins aligned to each reference assemblies_by_reference, not_aligned_assemblies = _partition_contigs( assemblies, ref_fpaths, corrected_dirpath, os.path.join(output_dirpath, 'combined_quast_output', 'contigs_reports', 'alignments_%s.tsv')) for ref_name, ref_assemblies in assemblies_by_reference.iteritems(): logger.info('') if not ref_assemblies: logger.info('No contigs were aligned to the reference ' + ref_name) else: run_name = 'for the contigs aligned to ' + ref_name logger.info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=ref_assemblies, reference_fpath=os.path.join(corrected_dirpath, ref_name) + common_ref_fasta_ext, output_dirpath=os.path.join(output_dirpath, ref_name + '_quast_output'), exit_on_exception=False, num_notifications_tuple=total_num_notifications) # Finally running for the contigs that has not been aligned to any reference run_name = 'for the contigs not alined anywhere' logger.info() logger.info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, 'not_aligned_quast_output'), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error('Error running quast.py for the contigs not aligned anywhere') quast._cleanup(corrected_dirpath) logger.info('') logger.info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error( 'QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) genes = [] operons = [] html_report = qconfig.html_report make_latest_symlink = True ref_txt_fpath = None try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage(meta=True) sys.exit(2) quast_py_args = args[:] test_mode = False for opt, arg in options: if opt in ('-d', '--debug'): options.remove((opt, arg)) qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt == '--test' or opt == '--test-no-ref': options.remove((opt, arg)) quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) options += [('-o', 'quast_test_output')] if opt == '--test': options += [('-R', ','.join([ os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_2.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_3.fasta') ]))] contigs_fpaths += [ os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_2.fasta') ] test_mode = True elif opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", meta=True, short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version(meta=True) sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=True) sys.exit(2) ref_fpaths = [] combined_ref_fpath = '' reads_fpath_f = '' reads_fpath_r = '' output_dirpath = None labels = None all_labels_from_dirs = False for opt, arg in options: if opt in ('-o', "--output-dir"): # Removing output dir arg in order to further # construct other quast calls from this options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) output_dirpath = os.path.abspath(arg) make_latest_symlink = False elif opt in ('-G', "--genes"): assert_file_exists(arg, 'genes') genes += arg elif opt in ('-O', "--operons"): assert_file_exists(arg, 'operons') operons += arg elif opt in ('-R', "--reference"): # Removing reference args in order to further # construct quast calls from this args with other reference options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) if os.path.isdir(arg): ref_fpaths = [ os.path.join(path, file) for (path, dirs, files) in os.walk(arg) for file in files if qutils.check_is_fasta_file(file) ] ref_fpaths.sort() else: ref_fpaths = arg.split(',') for i, ref_fpath in enumerate(ref_fpaths): assert_file_exists(ref_fpath, 'reference') ref_fpaths[i] = ref_fpath elif opt == '--max-ref-number': quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) qconfig.max_references = int(arg) if qconfig.max_references < 0: qconfig.max_references = 0 elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-l', '--labels'): quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) labels = quast.parse_labels(arg, contigs_fpaths) elif opt == '-L': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) all_labels_from_dirs = True elif opt in ('-j', '--save-json'): pass elif opt in ('-J', '--save-json-to'): pass elif opt == "--contig-thresholds": pass elif opt in ('-c', "--mincluster"): pass elif opt == "--est-ref-size": pass elif opt == "--gene-thresholds": pass elif opt in ('-s', "--scaffolds"): pass elif opt == "--gage": pass elif opt == "--debug": pass elif opt in ('-e', "--eukaryote"): pass elif opt in ('-f', "--gene-finding"): pass elif opt in ('-i', "--min-alignment"): pass elif opt in ('-c', "--min-cluster"): pass elif opt in ('-a', "--ambiguity-usage"): pass elif opt in ('-u', "--use-all-alignments"): pass elif opt == "--strict-NA": pass elif opt in ('-x', "--extensive-mis-size"): pass elif opt == "--meta": pass elif opt == '--references-list': ref_txt_fpath = arg elif opt == '--glimmer': pass elif opt == '--no-snps': pass elif opt == '--no-check': pass elif opt == '--no-gc': pass elif opt == '--no-plots': pass elif opt == '--no-html': html_report = False elif opt == '--fast': # --no-check, --no-gc, --no-snps will automatically set in QUAST runs html_report = False elif opt == '--plots-format': pass elif opt == '--memory-efficient': pass elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) elif opt in ('-2', '--reads2'): reads_fpath_r = arg quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: if contigs_fpath in quast_py_args: quast_py_args.remove(contigs_fpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir(output_dirpath, None, make_latest_symlink, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None) logger.start() qconfig.set_max_threads(logger) ######################################################################## from libs import reporting reload(reporting) if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES if ref_fpaths: logger.main_info() logger.main_info('Reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') assemblies, correct_assemblies = _correct_contigs(contigs_fpaths, output_dirpath, labels) if not assemblies: logger.error( "None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold." ) return 4 # Running QUAST(s) quast_py_args += ['--meta'] downloaded_refs = False # SEARCHING REFERENCES if not ref_fpaths: logger.main_info() if qconfig.max_references == 0: logger.notice( "Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled" ) else: if ref_txt_fpath: logger.main_info( "List of references was provided, starting to download reference genomes from NCBI..." ) else: logger.main_info( "No references are provided, starting to search for reference genomes in SILVA 16S rRNA database " "and to download them from NCBI...") downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname) if not os.path.isdir(downloaded_dirpath): os.mkdir(downloaded_dirpath) ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, ref_txt_fpath) if ref_fpaths: search_references_meta.is_quast_first_run = True if not ref_txt_fpath: downloaded_refs = True logger.main_info() logger.main_info('Downloaded reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) elif test_mode and ref_fpaths is None: logger.error( 'Failed to download or setup SILVA 16S rRNA database for working without ' 'references on metagenome datasets!', to_stderr=True, exit_with_code=4) if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.main_info() logger.notice( 'No references are provided, starting regular QUAST with MetaGeneMark gene finder' ) _start_quast_main(None, quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, exit_on_exception=True) exit(0) # Running combined reference combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths, os.path.join(combined_output_dirpath, qconfig.variation_dirname), external_logger=logger) if bed_fpath: quast_py_args += ['--bed-file'] quast_py_args += [bed_fpath] quast_py_args += ['--combined-ref'] run_name = 'for the combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) if qconfig.html_report: from libs.html_saver import json_saver json_texts = [] else: json_texts = None return_code, total_num_notifications, assemblies, labels = _start_quast_main( run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) for arg in args: if arg in ('-s', "--scaffolds"): quast_py_args.remove(arg) if json_texts is not None: json_texts.append(json_saver.json_text) search_references_meta.is_quast_first_run = False genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats') genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt') if not os.path.exists(genome_info_fpath): logger.main_info('') logger.main_info( 'Failed aligning the contigs for all the references. ' + ('Try to restart MetaQUAST with another references.' if not downloaded_refs else 'Try to use option --max-ref-number to change maximum number of references ' '(per each assembly) to download.')) logger.main_info('') quast._cleanup(corrected_dirpath) logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode) return if downloaded_refs: logger.main_info() logger.main_info( 'Excluding downloaded references with low genome fraction from further analysis..' ) corr_ref_fpaths = remove_unaligned_downloaded_refs( genome_info_fpath, ref_fpaths, chromosomes_by_refs) if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths: logger.main_info() logger.main_info('Filtered reference(s):') os.remove(combined_ref_fpath) contigs_analyzer.ref_labels_by_chromosomes = {} corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(corr_ref_fpaths, corrected_dirpath) run_name = 'for the corrected combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications, assemblies, labels = _start_quast_main( run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts = json_texts[:-1] json_texts.append(json_saver.json_text) elif corr_ref_fpaths == ref_fpaths: logger.main_info( 'All downloaded references have genome fraction more than 10%. Nothing was excluded.' ) else: logger.main_info( 'All downloaded references have low genome fraction. Nothing was excluded for now.' ) quast_py_args += ['--no-check-meta'] qconfig.contig_thresholds = ','.join([ str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig ]) if not qconfig.contig_thresholds: qconfig.contig_thresholds = 'None' quast_py_args = __remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds) quast_py_args += ['--contig-thresholds'] quast_py_args += [qconfig.contig_thresholds] quast_py_args.remove('--combined-ref') logger.main_info() logger.main_info( 'Partitioning contigs into bins aligned to each reference..') assemblies_by_reference, not_aligned_assemblies = _partition_contigs( assemblies, corrected_ref_fpaths, corrected_dirpath, os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels) ref_names = [] output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname) for ref_fpath, ref_assemblies in assemblies_by_reference: ref_name = qutils.name_from_fpath(ref_fpath) logger.main_info('') if not ref_assemblies: logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..') else: ref_names.append(ref_name) run_name = 'for the contigs aligned to ' + ref_name logger.main_info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main( run_name, quast_py_args, assemblies=ref_assemblies, reference_fpath=ref_fpath, output_dirpath=os.path.join(output_dirpath_per_ref, ref_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if json_texts is not None: json_texts.append(json_saver.json_text) # Finally running for the contigs that has not been aligned to any reference no_unaligned_contigs = True for assembly in not_aligned_assemblies: if os.path.isfile( assembly.fpath) and os.stat(assembly.fpath).st_size != 0: no_unaligned_contigs = False break run_name = 'for the contigs not aligned anywhere' logger.main_info() if no_unaligned_contigs: logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)') else: logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main( run_name, quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error( 'Error running quast.py for the contigs not aligned anywhere') elif return_code == 4: # no unaligned contigs, i.e. everything aligned no_unaligned_contigs = True if not no_unaligned_contigs: if json_texts is not None: json_texts.append(json_saver.json_text) if ref_names: logger.print_timestamp() logger.main_info("Summarizing results...") summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir) if not os.path.isdir(summary_output_dirpath): os.makedirs(summary_output_dirpath) if html_report and json_texts: from libs.html_saver import html_saver html_summary_report_fpath = html_saver.init_meta_report( output_dirpath) else: html_summary_report_fpath = None from libs import create_meta_summary metrics_for_plots = reporting.Fields.main_metrics misassembl_metrics = [ reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION, reporting.Fields.MIS_ISTRANSLOCATIONS ] create_meta_summary.do( html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, ref_names if no_unaligned_contigs else ref_names + [qconfig.not_aligned_name]) if html_report and json_texts: from libs import plotter html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True) html_saver.create_meta_report(output_dirpath, json_texts) quast._cleanup(corrected_dirpath) logger.main_info('') logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error('QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) genes = [] operons = [] html_report = qconfig.html_report make_latest_symlink = True ref_txt_fpath = None try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage(meta=True) sys.exit(2) quast_py_args = args[:] test_mode = False for opt, arg in options: if opt in ('-d', '--debug'): options.remove((opt, arg)) qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt == '--test' or opt == '--test-no-ref': options.remove((opt, arg)) quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) options += [('-o', 'quast_test_output')] if opt == '--test': options += [('-R', ','.join([os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_2.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_3.fasta')]))] contigs_fpaths += [os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_2.fasta')] test_mode = True elif opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", meta=True, short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version(meta=True) sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=True) sys.exit(2) ref_fpaths = [] combined_ref_fpath = '' reads_fpath_f = '' reads_fpath_r = '' output_dirpath = None labels = None all_labels_from_dirs = False for opt, arg in options: if opt in ('-o', "--output-dir"): # Removing output dir arg in order to further # construct other quast calls from this options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) output_dirpath = os.path.abspath(arg) make_latest_symlink = False elif opt in ('-G', "--genes"): assert_file_exists(arg, 'genes') genes += arg elif opt in ('-O', "--operons"): assert_file_exists(arg, 'operons') operons += arg elif opt in ('-R', "--reference"): # Removing reference args in order to further # construct quast calls from this args with other reference options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) if os.path.isdir(arg): ref_fpaths = [os.path.join(path,file) for (path, dirs, files) in os.walk(arg) for file in files if qutils.check_is_fasta_file(file)] ref_fpaths.sort() else: ref_fpaths = arg.split(',') for i, ref_fpath in enumerate(ref_fpaths): assert_file_exists(ref_fpath, 'reference') ref_fpaths[i] = ref_fpath elif opt == '--max-ref-number': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) qconfig.max_references = int(arg) if qconfig.max_references < 0: qconfig.max_references = 0 elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-l', '--labels'): quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) labels = quast.parse_labels(arg, contigs_fpaths) elif opt == '-L': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) all_labels_from_dirs = True elif opt in ('-j', '--save-json'): pass elif opt in ('-J', '--save-json-to'): pass elif opt == "--contig-thresholds": pass elif opt in ('-c', "--mincluster"): pass elif opt == "--est-ref-size": pass elif opt == "--gene-thresholds": pass elif opt in ('-s', "--scaffolds"): pass elif opt == "--gage": pass elif opt == "--debug": pass elif opt in ('-e', "--eukaryote"): pass elif opt in ('-f', "--gene-finding"): pass elif opt in ('-i', "--min-alignment"): pass elif opt in ('-c', "--min-cluster"): pass elif opt in ('-a', "--ambiguity-usage"): pass elif opt in ('-u', "--use-all-alignments"): pass elif opt == "--strict-NA": pass elif opt in ('-x', "--extensive-mis-size"): pass elif opt == "--meta": pass elif opt == '--references-list': ref_txt_fpath = arg elif opt == '--glimmer': pass elif opt == '--no-snps': pass elif opt == '--no-check': pass elif opt == '--no-gc': pass elif opt == '--no-plots': pass elif opt == '--no-html': html_report = False elif opt == '--fast': # --no-check, --no-gc, --no-snps will automatically set in QUAST runs html_report = False elif opt == '--plots-format': pass elif opt == '--memory-efficient': pass elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) elif opt in ('-2', '--reads2'): reads_fpath_r = arg quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: if contigs_fpath in quast_py_args: quast_py_args.remove(contigs_fpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir( output_dirpath, None, make_latest_symlink, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None) logger.start() qconfig.set_max_threads(logger) ######################################################################## from libs import reporting reload(reporting) if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES if ref_fpaths: logger.main_info() logger.main_info('Reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') assemblies, correct_assemblies = _correct_contigs(contigs_fpaths, output_dirpath, labels) if not assemblies: logger.error("None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.") return 4 # Running QUAST(s) quast_py_args += ['--meta'] downloaded_refs = False # SEARCHING REFERENCES if not ref_fpaths: logger.main_info() if qconfig.max_references == 0: logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled") else: if ref_txt_fpath: logger.main_info("List of references was provided, starting to download reference genomes from NCBI...") else: logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database " "and to download them from NCBI...") downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname) if not os.path.isdir(downloaded_dirpath): os.mkdir(downloaded_dirpath) ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, ref_txt_fpath) if ref_fpaths: search_references_meta.is_quast_first_run = True if not ref_txt_fpath: downloaded_refs = True logger.main_info() logger.main_info('Downloaded reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) elif test_mode and ref_fpaths is None: logger.error('Failed to download or setup SILVA 16S rRNA database for working without ' 'references on metagenome datasets!', to_stderr=True, exit_with_code=4) if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.main_info() logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder') _start_quast_main( None, quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, exit_on_exception=True) exit(0) # Running combined reference combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths, os.path.join(combined_output_dirpath, qconfig.variation_dirname), external_logger=logger) if bed_fpath: quast_py_args += ['--bed-file'] quast_py_args += [bed_fpath] quast_py_args += ['--combined-ref'] run_name = 'for the combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) if qconfig.html_report: from libs.html_saver import json_saver json_texts = [] else: json_texts = None return_code, total_num_notifications, assemblies, labels = _start_quast_main(run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) for arg in args: if arg in ('-s', "--scaffolds"): quast_py_args.remove(arg) if json_texts is not None: json_texts.append(json_saver.json_text) search_references_meta.is_quast_first_run = False genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats') genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt') if not os.path.exists(genome_info_fpath): logger.main_info('') logger.main_info('Failed aligning the contigs for all the references. ' + ('Try to restart MetaQUAST with another references.' if not downloaded_refs else 'Try to use option --max-ref-number to change maximum number of references ' '(per each assembly) to download.')) logger.main_info('') quast._cleanup(corrected_dirpath) logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode) return if downloaded_refs: logger.main_info() logger.main_info('Excluding downloaded references with low genome fraction from further analysis..') corr_ref_fpaths = remove_unaligned_downloaded_refs(genome_info_fpath, ref_fpaths, chromosomes_by_refs) if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths: logger.main_info() logger.main_info('Filtered reference(s):') os.remove(combined_ref_fpath) contigs_analyzer.ref_labels_by_chromosomes = {} corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(corr_ref_fpaths, corrected_dirpath) run_name = 'for the corrected combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications, assemblies, labels = _start_quast_main(run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts = json_texts[:-1] json_texts.append(json_saver.json_text) elif corr_ref_fpaths == ref_fpaths: logger.main_info('All downloaded references have genome fraction more than 10%. Nothing was excluded.') else: logger.main_info('All downloaded references have low genome fraction. Nothing was excluded for now.') quast_py_args += ['--no-check-meta'] qconfig.contig_thresholds = ','.join([str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig]) if not qconfig.contig_thresholds: qconfig.contig_thresholds = 'None' quast_py_args = __remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds) quast_py_args += ['--contig-thresholds'] quast_py_args += [qconfig.contig_thresholds] quast_py_args.remove('--combined-ref') logger.main_info() logger.main_info('Partitioning contigs into bins aligned to each reference..') assemblies_by_reference, not_aligned_assemblies = _partition_contigs( assemblies, corrected_ref_fpaths, corrected_dirpath, os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels) ref_names = [] output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname) for ref_fpath, ref_assemblies in assemblies_by_reference: ref_name = qutils.name_from_fpath(ref_fpath) logger.main_info('') if not ref_assemblies: logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..') else: ref_names.append(ref_name) run_name = 'for the contigs aligned to ' + ref_name logger.main_info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=ref_assemblies, reference_fpath=ref_fpath, output_dirpath=os.path.join(output_dirpath_per_ref, ref_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if json_texts is not None: json_texts.append(json_saver.json_text) # Finally running for the contigs that has not been aligned to any reference no_unaligned_contigs = True for assembly in not_aligned_assemblies: if os.path.isfile(assembly.fpath) and os.stat(assembly.fpath).st_size != 0: no_unaligned_contigs = False break run_name = 'for the contigs not aligned anywhere' logger.main_info() if no_unaligned_contigs: logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)') else: logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error('Error running quast.py for the contigs not aligned anywhere') elif return_code == 4: # no unaligned contigs, i.e. everything aligned no_unaligned_contigs = True if not no_unaligned_contigs: if json_texts is not None: json_texts.append(json_saver.json_text) if ref_names: logger.print_timestamp() logger.main_info("Summarizing results...") summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir) if not os.path.isdir(summary_output_dirpath): os.makedirs(summary_output_dirpath) if html_report and json_texts: from libs.html_saver import html_saver html_summary_report_fpath = html_saver.init_meta_report(output_dirpath) else: html_summary_report_fpath = None from libs import create_meta_summary metrics_for_plots = reporting.Fields.main_metrics misassembl_metrics = [reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION, reporting.Fields.MIS_ISTRANSLOCATIONS] create_meta_summary.do(html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, ref_names if no_unaligned_contigs else ref_names + [qconfig.not_aligned_name]) if html_report and json_texts: from libs import plotter html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True) html_saver.create_meta_report(output_dirpath, json_texts) quast._cleanup(corrected_dirpath) logger.main_info('') logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)