def get_coverage(output_dirpath, ref_fpath, ref_name, bam_fpath, bam_sorted_fpath, log_path, err_path, cov_fpath, physical_cov_fpath, correct_chr_names): raw_cov_fpath = cov_fpath + '_raw' chr_len_fpath = get_chr_len_fpath(ref_fpath, correct_chr_names) if not is_non_empty_file(cov_fpath): logger.info(' Calculating reads coverage...') if not is_non_empty_file(raw_cov_fpath): if not is_non_empty_file(bam_sorted_fpath): qutils.call_subprocess([ sambamba_fpath('sambamba'), 'sort', '-t', str(qconfig.max_threads), '-o', bam_sorted_fpath, bam_fpath ], stdout=open(log_path, 'a'), stderr=open(err_path, 'a'), logger=logger) qutils.call_subprocess([ bedtools_fpath('bedtools'), 'genomecov', '-bga', '-ibam', bam_sorted_fpath, '-g', chr_len_fpath ], stdout=open(raw_cov_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger) qutils.assert_file_exists(raw_cov_fpath, 'coverage file') proceed_cov_file(raw_cov_fpath, cov_fpath, correct_chr_names) if not is_non_empty_file(physical_cov_fpath): raw_cov_fpath = get_physical_coverage(output_dirpath, ref_fpath, ref_name, bam_fpath, log_path, err_path, physical_cov_fpath, chr_len_fpath) proceed_cov_file(raw_cov_fpath, physical_cov_fpath, correct_chr_names) return cov_fpath, physical_cov_fpath
def parse_features(option, opt_str, value, parser, logger, is_old_format=False): if is_old_format: fpath = value assert_file_exists(fpath, 'genomic feature') features = dict([('gene', fpath)]) logger.warning( 'Option -G is deprecated! Please use --features (or -g) to specify a file with genomic features.\n' 'If you want QUAST to extract only a specific genomic feature from the file, \n' 'you should prepend the filepath with the feature name and a colon, for example:\n' '--features CDS:genes.gff --features transcript:transcripts.bed\n' 'Otherwise, all features would be counted:\n' '--features genes.gff\n') else: if ':' in value: feature, fpath = value.split(':') else: feature, fpath = qconfig.ALL_FEATURES_TYPE, value # special case -- read all features assert_file_exists(fpath, 'genomic feature') features = dict([(feature, fpath)]) ensure_value(qconfig, 'features', dict()).update(features)
def parse_meta_references(option, opt_str, value, parser, logger): ref_fpaths = [] ref_values = value.split(',') for i, ref_value in enumerate(ref_values): if os.path.isdir(ref_value): references = [join(path, file) for (path, dirs, files) in os.walk(ref_value) for file in files if qutils.check_is_fasta_file(file, logger=logger)] ref_fpaths.extend(sorted(references)) else: assert_file_exists(ref_value, 'reference') ref_fpaths.append(ref_value) ensure_value(qconfig, option.dest, []).extend(ref_fpaths)
def parse_files_list(option, opt_str, value, parser, extension, logger): fpaths = [] values = value.split(',') for i, value in enumerate(values): if value.endswith(extension): assert_file_exists(value, extension.upper() + ' file') fpaths.append(value) else: logger.error("incorrect extension for " + extension.upper() + " file (" + str(value) + ")! ", to_stderr=True, exit_with_code=2) ensure_value(qconfig, option.dest, []).extend(fpaths)
def parse_features(option, opt_str, value, parser, logger, is_old_format=False): if is_old_format: fpath = value assert_file_exists(fpath, 'genomic feature') features = dict([('gene', fpath)]) logger.warning('Option -G is deprecated! Please use --features to specify a file with genomic features.\n' 'If you want QUAST to extract only specific genomic feature from a file, you also can specify it:\n' '--features CDS:genes.gff --features transcript:transcripts.bed') else: feature, fpath = value.split(':') assert_file_exists(fpath, 'genomic feature') features = dict([(feature, fpath)]) ensure_value(qconfig, 'features', dict()).update(features)
def get_coverage(output_dirpath, ref_fpath, ref_name, bam_fpath, bam_sorted_fpath, log_path, err_path, cov_fpath, physical_cov_fpath, correct_chr_names): raw_cov_fpath = cov_fpath + '_raw' chr_len_fpath = get_chr_len_fpath(ref_fpath, correct_chr_names) if not is_non_empty_file(cov_fpath): logger.info(' Calculating reads coverage...') if not is_non_empty_file(raw_cov_fpath): if not is_non_empty_file(bam_sorted_fpath): qutils.call_subprocess([sambamba_fpath('sambamba'), 'sort', '-t', str(qconfig.max_threads), '-o', bam_sorted_fpath, bam_fpath], stdout=open(log_path, 'a'), stderr=open(err_path, 'a'), logger=logger) qutils.call_subprocess([bedtools_fpath('bedtools'), 'genomecov', '-bga', '-ibam', bam_sorted_fpath, '-g', chr_len_fpath], stdout=open(raw_cov_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger) qutils.assert_file_exists(raw_cov_fpath, 'coverage file') proceed_cov_file(raw_cov_fpath, cov_fpath, correct_chr_names) if not is_non_empty_file(physical_cov_fpath): raw_cov_fpath = get_physical_coverage(output_dirpath, ref_fpath, ref_name, bam_fpath, log_path, err_path, physical_cov_fpath, chr_len_fpath) proceed_cov_file(raw_cov_fpath, physical_cov_fpath, correct_chr_names) return cov_fpath, physical_cov_fpath
def get_coverage(output_dirpath, ref_fpath, ref_name, bam_fpath, bam_sorted_fpath, log_path, err_fpath, correct_chr_names, cov_fpath, physical_cov_fpath=None, uncovered_fpath=None, create_cov_files=True): raw_cov_fpath = cov_fpath + '_raw' chr_len_fpath = get_chr_len_fpath(ref_fpath, correct_chr_names) if not is_non_empty_file(cov_fpath): logger.info(' Calculating reads coverage...') if not is_non_empty_file(raw_cov_fpath): if not is_non_empty_file(bam_sorted_fpath): sort_bam(bam_fpath, bam_sorted_fpath, log_path, err_fpath, logger) calculate_genome_cov(bam_sorted_fpath, raw_cov_fpath, chr_len_fpath, err_fpath, logger) qutils.assert_file_exists(raw_cov_fpath, 'coverage file') if uncovered_fpath: print_uncovered_regions(raw_cov_fpath, uncovered_fpath, correct_chr_names) if create_cov_files: proceed_cov_file(raw_cov_fpath, cov_fpath, correct_chr_names) if not is_non_empty_file(physical_cov_fpath) and create_cov_files: raw_cov_fpath = get_physical_coverage(output_dirpath, ref_name, bam_fpath, log_path, err_fpath, physical_cov_fpath, chr_len_fpath) proceed_cov_file(raw_cov_fpath, physical_cov_fpath, correct_chr_names) return cov_fpath, physical_cov_fpath
def check_file(option, opt, value): files = value.split(',') for f in files: assert_file_exists(f, option.dest) return ','.join(abspath(f) for f in files)
def parse_options(logger, quast_args): mode = get_mode(quast_args[0]) is_metaquast = True if mode == 'meta' else False qconfig.large_genome = True if mode == 'large' else False if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args: qconfig.usage('--help-hidden' in quast_args, mode=mode, short=False) sys.exit(0) if '-v' in quast_args or '--version' in quast_args: qconfig.print_version(mode) sys.exit(0) quast_py_args = quast_args[1:] options = [ (['--debug'], dict( dest='debug', action='store_true') ), (['--no-portable-html'], dict( dest='portable_html', action='store_false') ), (['--test'], dict( dest='test', action='store_true') ), (['--test-sv'], dict( dest='test_sv', action='store_true') ), (['--test-no-ref'], dict( dest='test_no_ref', action='store_true') ), (['-o', '--output-dir'], dict( dest='output_dirpath', type='string', action='callback', callback=check_output_dir, callback_args=(logger,)) ), (['-t', '--threads'], dict( dest='max_threads', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': 1, 'min_value': 1}) ), (['-r', '-R', '--reference'], dict( dest='reference', type='string' if is_metaquast else 'file', action='callback' if is_metaquast else 'store', callback_args=(logger,) if is_metaquast else None, callback=parse_meta_references if is_metaquast else None) ), (['-O', '--operons'], dict( dest='operons', type='file', action='extend') ), (['-G', '--genes'], dict( dest='genes', type='string', action='callback', callback_args=(logger, True), callback=parse_features) ), (['-g', '--features'], dict( dest='features', type='string', action='callback', callback_args=(logger,), callback=parse_features) ), (['-1', '--reads1'], dict( dest='forward_reads', type='file', action='extend') ), (['-2', '--reads2'], dict( dest='reverse_reads', type='file', action='extend') ), (['--pe1'], dict( dest='forward_reads', type='file', action='extend') ), (['--pe2'], dict( dest='reverse_reads', type='file', action='extend') ), (['--mp1'], dict( dest='mp_forward_reads', type='file', action='extend') ), (['--mp2'], dict( dest='mp_reverse_reads', type='file', action='extend') ), (['--12'], dict( dest='interlaced_reads', type='file', action='extend') ), (['--pe12'], dict( dest='interlaced_reads', type='file', action='extend') ), (['--mp12'], dict( dest='mp_interlaced_reads', type='file', action='extend') ), (['--single'], dict( dest='unpaired_reads', type='file', action='extend') ), (['--pacbio'], dict( dest='pacbio_reads', type='file', action='extend') ), (['--nanopore'], dict( dest='nanopore_reads', type='file', action='extend') ), (['--ref-sam'], dict( dest='reference_sam', type='file') ), (['--ref-bam'], dict( dest='reference_bam', type='file') ), (['--sam'], dict( dest='sam_fpaths', type='string', action='callback', callback_args=('.sam', logger), callback=parse_files_list) ), (['--bam'], dict( dest='bam_fpaths', type='string', action='callback', callback_args=('.bam', logger), callback=parse_files_list) ), (['--sv-bedpe'], dict( dest='bed', type='file') ), (['--cov'], dict( dest='cov_fpath', type='file') ), (['--phys-cov'], dict( dest='phys_cov_fpath', type='file') ), (['-l', '--labels'], dict( dest='labels', type='string') ), (['-L'], dict( dest='all_labels_from_dirs', action='store_true') ), (['--mgm'], dict( dest='metagenemark', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']}, default=False) ), (['-s', '--split-scaffolds'], dict( dest='split_scaffolds', action='store_true') ), (['-e', '--eukaryote'], dict( dest='prokaryote', action='store_false') ), (['--fungus'], dict( dest='is_fungus', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['is_fungus'], 'store_false_values': ['prokaryote']}) ), (['--large'], dict( dest='large_genome', action='store_true') ), (['-f', '--gene-finding'], dict( dest='gene_finding', action='store_true') ), (['--rna-finding'], dict( dest='rna_gene_finding', action='store_true') ), (['--fragmented'], dict( dest='check_for_fragmented_ref', action='store_true') ), (['--fragmented-max-indent'], dict( dest='fragmented_max_indent', type='int', default=qconfig.MAX_INDEL_LENGTH, action='callback', callback=set_fragmented_max_indent, callback_args=(logger,)) ), (['-a', '--ambiguity-usage'], dict( dest='ambiguity_usage', type='string', default=qconfig.ambiguity_usage, action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': ['none', 'one', 'all']}) ), (['--ambiguity-score'], dict( dest='ambiguity_score', type='float', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 0.8, 'max_value': 1.0}) ), (['-u', '--use-all-alignments'], dict( dest='use_all_alignments', action='store_true') ), (['--strict-NA'], dict( dest='strict_NA', action='store_true') ), (['--unaligned-part-size'], dict( dest='unaligned_part_size', type=int) ), (['--skip-unaligned-mis-contigs'], dict( dest='unaligned_mis_threshold', action="store_const", const=0.0) ), (['-x', '--extensive-mis-size'], dict( dest='extensive_misassembly_threshold', type='int', default=qconfig.extensive_misassembly_threshold, action='callback', callback=set_extensive_mis_size, callback_args=(logger,)) ), (['--scaffold-gap-max-size'], dict( dest='scaffolds_gap_threshold', type=int) ), (['-m', '--min-contig'], dict( dest='min_contig', type='int') ), (['-i', '--min-alignment'], dict( dest='min_alignment', type='int') ), (['--min-identity'], dict( dest='min_IDY', type='float', default=qconfig.min_IDY, action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 80.0, 'max_value': 100.0}) ), (['--est-ref-size'], dict( dest='estimated_reference_size', type='int') ), (['--contig-thresholds'], dict( dest='contig_thresholds') ), (['--gene-thresholds'], dict( dest='genes_lengths') ), (['--glimmer'], dict( dest='glimmer', action='store_true', default=False) ), (['-b', '--conserved-genes-finding'], dict( dest='run_busco', action='store_true', default=False) ), (['-k', '--k-mer-stats'], dict( dest='use_kmc', action='store_true', default=False) ), (['--k-mer-size'], dict( dest='unique_kmer_len', type='int') ), (['--upper-bound-assembly'], dict( dest='optimal_assembly', action='store_true') ), (['--upper-bound-min-con'], dict( dest='upperbound_min_connections', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 1}) ), (['--est-insert-size'], dict( dest='optimal_assembly_insert_size', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': qconfig.optimal_assembly_min_IS, 'max_value': qconfig.optimal_assembly_max_IS}) ), (['--plots-format'], dict( dest='plot_extension', type='string', action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': qconfig.supported_plot_extensions}) ), (['--use-input-ref-order'], dict( dest='use_input_ref_order', action='store_true') ), (['--circos'], dict( dest='draw_circos', action='store_true') ), (['--no-read-stats'], dict( dest='no_read_stats', action='store_true') ), (['--fast'], dict( dest='fast', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip', 'no_read_stats'], 'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html', 'analyze_gaps']}, default=False) ), (['--no-gzip'], dict( dest='no_gzip', action='store_true') ), (['--no-check'], dict( dest='no_check', action='store_true') ), (['--no-check-meta'], dict( dest='no_check_meta', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']}) ), (['--no-snps'], dict( dest='show_snps', action='store_false') ), (['--no-plots'], dict( dest='draw_plots', action='store_false') ), (['--no-html'], dict( dest='html_report', action='callback', callback=set_multiple_variables, callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']}) ), (['--no-icarus'], dict( dest='create_icarus_html', action='store_false') ), (['--no-gc'], dict( dest='no_gc', action='store_true') ), (['--no-sv'], dict( dest='no_sv', action='store_true') ), (['--memory-efficient'], dict( dest='memory_efficient', action='store_true') ), (['--space-efficient'], dict( dest='space_efficient', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['space_efficient'], 'store_false_values': ['show_snps', 'create_icarus_html']},) ), (['--silent'], dict( dest='silent', action='store_true') ), (['--combined-ref'], dict( dest='is_combined_ref', action='store_true') ), (['--colors'], dict( dest='used_colors', action='extend') ), (['--ls'], dict( dest='used_ls', action='extend') ), (['-j', '--save-json'], dict( dest='save_json', action='store_true') ), (['-J', '--save-json-to'], dict( dest='json_output_dirpath') ), (['--err-fpath'], dict( dest='error_log_fpath') ), (['--read-support'], dict( dest='calculate_read_support', action='store_true') ) ] if is_metaquast: options += [ (['--unique-mapping'], dict( dest='unique_mapping', action='store_true') ), (['--max-ref-number'], dict( dest='max_references', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0}) ), (['--references-list'], dict( dest='references_txt') ), (['--blast-db'], dict( dest='custom_blast_db_fpath') ) ] parser = OptionParser(option_class=QuastOption) for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, contigs_fpaths) = parser.parse_args(quast_args[1:]) if qconfig.test_sv and is_metaquast: msg = "Option --test-sv can be used for QUAST only\n" wrong_test_option(logger, msg) if qconfig.test_no_ref and not is_metaquast: msg = "Option --test-no-ref can be used for MetaQUAST only\n" wrong_test_option(logger, msg) if qconfig.glimmer and qconfig.gene_finding: logger.error("You cannot use --glimmer and " + ("--mgm" if qconfig.metagenemark else "--gene-finding") + \ " simultaneously!", exit_with_code=3) if qconfig.test or qconfig.test_no_ref or qconfig.test_sv: qconfig.output_dirpath = abspath(qconfig.test_output_dirname) check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' + 'Please, rerun QUAST from a different directory.') if qconfig.test or qconfig.test_sv: qconfig.reference = meta_test_references if is_metaquast else test_reference if not is_metaquast: qconfig.features = test_features qconfig.operons = test_operons qconfig.glimmer = True if not qconfig.large_genome: # special case -- large mode imposes eukaryote gene finding (GeneMark-ES) and our test data is too small for it. qconfig.gene_finding = True if qconfig.test_sv: qconfig.forward_reads = test_forward_reads qconfig.reverse_reads = test_reverse_reads contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths qconfig.test = True if any(not isfile(fpath) for fpath in contigs_fpaths): logger.info( '\nYou are probably running QUAST installed via pip, which does not include test data.\n' 'This is fine, just start using QUAST on your own data!\n\n' 'If you still want to run tests, please download and unpack test data to CWD:\n' ' wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n') sys.exit(2) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n", to_stderr=True) qconfig.usage(stream=sys.stderr) sys.exit(2) if qconfig.large_genome: set_large_genome_parameters() if qconfig.extensive_misassembly_threshold is None: qconfig.extensive_misassembly_threshold = \ qconfig.LARGE_EXTENSIVE_MIS_THRESHOLD if qconfig.large_genome else qconfig.DEFAULT_EXT_MIS_SIZE if qconfig.min_contig is None: qconfig.min_contig = qconfig.LARGE_MIN_CONTIG if qconfig.large_genome else qconfig.DEFAULT_MIN_CONTIG if qconfig.min_alignment is None: qconfig.min_alignment = qconfig.LARGE_MIN_ALIGNMENT if qconfig.large_genome else qconfig.DEFAULT_MIN_ALIGNMENT for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') if qconfig.json_output_dirpath: qconfig.save_json = True if not qconfig.output_dirpath: check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' + 'Please, specify a different directory using -o option.') qconfig.output_dirpath, qconfig.json_output_dirpath, existing_quast_dir = \ set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath, qconfig.save_json if not is_metaquast else None) logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath) logger.set_up_console_handler(debug=qconfig.debug) logger.print_command_line(quast_args, wrap_after=None, is_main=True) logger.start() if existing_quast_dir: logger.notice("Output directory already exists and looks like a QUAST output dir. " "Existing results can be reused (e.g. previously generated alignments)!") qutils.remove_reports(qconfig.output_dirpath) if qconfig.labels: qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths) qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")] if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")] qconfig.set_max_threads(logger) if parser.values.ambiguity_score: if qconfig.ambiguity_usage != 'all': qconfig.ambiguity_usage = 'all' logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified") if is_metaquast: quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths) if qconfig.sam_fpaths or qconfig.bam_fpaths: check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger) return quast_py_args, contigs_fpaths
def parse_options(logger, quast_args, is_metaquast=False): if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args: qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False) sys.exit(0) if '-v' in quast_args or '--version' in quast_args: qconfig.print_version(meta=is_metaquast) sys.exit(0) quast_py_args = quast_args[1:] options = [ (['--debug'], dict( dest='debug', action='store_true') ), (['--no-portable-html'], dict( dest='portable_html', action='store_false') ), (['--test'], dict( dest='test', action='store_true') ), (['--test-sv'], dict( dest='test_sv', action='store_true') ), (['--test-no-ref'], dict( dest='test_no_ref', action='store_true') ), (['-o', '--output-dir'], dict( dest='output_dirpath', type='string', action='callback', callback=check_output_dir, callback_args=(logger,)) ), (['-t', '--threads'], dict( dest='max_threads', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': 1, 'min_value': 1}) ), (['-R', '--reference'], dict( dest='reference', type='string' if is_metaquast else 'file', action='callback' if is_metaquast else 'store', callback_args=(logger,) if is_metaquast else None, callback=parse_meta_references if is_metaquast else None) ), (['-G', '--genes'], dict( dest='genes', type='file', action='extend') ), (['-O', '--operons'], dict( dest='operons', type='file', action='extend') ), (['-1', '--reads1'], dict( dest='forward_reads', type='file', action='extend') ), (['-2', '--reads2'], dict( dest='reverse_reads', type='file', action='extend') ), (['--pe1'], dict( dest='forward_reads', type='file', action='extend') ), (['--pe2'], dict( dest='reverse_reads', type='file', action='extend') ), (['--mp1'], dict( dest='mp_forward_reads', type='file', action='extend') ), (['--mp2'], dict( dest='mp_reverse_reads', type='file', action='extend') ), (['--12'], dict( dest='interlaced_reads', type='file', action='extend') ), (['--pe12'], dict( dest='interlaced_reads', type='file', action='extend') ), (['--mp12'], dict( dest='mp_interlaced_reads', type='file', action='extend') ), (['--single'], dict( dest='unpaired_reads', type='file', action='extend') ), (['--ref-sam'], dict( dest='reference_sam', type='file') ), (['--ref-bam'], dict( dest='reference_bam', type='file') ), (['--sam'], dict( dest='sam_fpaths', type='string', action='callback', callback_args=('.sam', logger), callback=parse_files_list) ), (['--bam'], dict( dest='bam_fpaths', type='string', action='callback', callback_args=('.bam', logger), callback=parse_files_list) ), (['--sv-bedpe'], dict( dest='bed', type='file') ), (['--cov'], dict( dest='cov_fpath', type='file') ), (['--phys-cov'], dict( dest='phys_cov_fpath', type='file') ), (['-l', '--labels'], dict( dest='labels', type='string') ), (['-L'], dict( dest='all_labels_from_dirs', action='store_true') ), (['--mgm'], dict( dest='metagenemark', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']}, default=False) ), (['-s', '--scaffolds'], dict( dest='scaffolds', action='store_true') ), (['-e', '--eukaryote'], dict( dest='prokaryote', action='store_false') ), (['--fungus'], dict( dest='is_fungus', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['is_fungus'], 'store_false_values': ['prokaryote']}) ), (['--large'], dict( dest='large_genome', action='store_true') ), (['-f', '--gene-finding'], dict( dest='gene_finding', action='store_true') ), (['--rna-finding'], dict( dest='rna_gene_finding', action='store_true') ), (['--fragmented'], dict( dest='check_for_fragmented_ref', action='store_true') ), (['--fragmented-max-indent'], dict( dest='fragmented_max_indent', type='int', default=qconfig.MAX_INDEL_LENGTH, action='callback', callback=set_fragmented_max_indent, callback_args=(logger,)) ), (['-a', '--ambiguity-usage'], dict( dest='ambiguity_usage', type='string', default=qconfig.ambiguity_usage, action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': ['none', 'one', 'all']}) ), (['--ambiguity-score'], dict( dest='ambiguity_score', type='float', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 0.8, 'max_value': 1.0}) ), (['-u', '--use-all-alignments'], dict( dest='use_all_alignments', action='store_true') ), (['--strict-NA'], dict( dest='strict_NA', action='store_true') ), (['--unaligned-part-size'], dict( dest='unaligned_part_size', type=int) ), (['-x', '--extensive-mis-size'], dict( dest='extensive_misassembly_threshold', type='int', default=qconfig.extensive_misassembly_threshold, action='callback', callback=set_extensive_mis_size, callback_args=(logger,)) ), (['--scaffold-gap-max-size'], dict( dest='scaffolds_gap_threshold', type=int) ), (['-m', '--min-contig'], dict( dest='min_contig', type='int') ), (['-c', '--min-cluster'], dict( dest='min_cluster', type='int') ), (['-i', '--min-alignment'], dict( dest='min_alignment', type='int') ), (['--min-identity'], dict( dest='min_IDY', type='float', default=qconfig.min_IDY, action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 80.0, 'max_value': 100.0}) ), (['--est-ref-size'], dict( dest='estimated_reference_size', type='int') ), (['--contig-thresholds'], dict( dest='contig_thresholds') ), (['--gene-thresholds'], dict( dest='genes_lengths') ), (['--glimmer'], dict( dest='glimmer', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']}, default=False) ), (['-b', '--find-conserved-genes'], dict( dest='run_busco', action='store_true', default=False) ), (['--ideal_assembly'], dict( dest='ideal_assembly', action='store_true') ), (['--est-insert-size'], dict( dest='ideal_assembly_insert_size', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': qconfig.ideal_assembly_min_IS, 'max_value': qconfig.ideal_assembly_max_IS}) ), (['--plots-format'], dict( dest='plot_extension', type='string', action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': qconfig.supported_plot_extensions}) ), (['--use-input-ref-order'], dict( dest='use_input_ref_order', action='store_true') ), (['--svg'], dict( dest='draw_svg', action='store_true') ), (['--fast'], dict( dest='fast', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'], 'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']}, default=False) ), (['--no-gzip'], dict( dest='no_gzip', action='store_true') ), (['--no-check'], dict( dest='no_check', action='store_true') ), (['--no-check-meta'], dict( dest='no_check_meta', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']}) ), (['--no-snps'], dict( dest='show_snps', action='store_false') ), (['--no-plots'], dict( dest='draw_plots', action='store_false') ), (['--no-html'], dict( dest='html_report', action='callback', callback=set_multiple_variables, callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']}) ), (['--no-icarus'], dict( dest='create_icarus_html', action='store_false') ), (['--no-gc'], dict( dest='no_gc', action='store_true') ), (['--no-sv'], dict( dest='no_sv', action='store_true') ), (['--memory-efficient'], dict( dest='memory_efficient', action='store_true') ), (['--space-efficient'], dict( dest='space_efficient', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['space_efficient'], 'store_false_values': ['create_icarus_html']},) ), (['--force-nucmer'], dict( dest='force_nucmer', action='store_true') ), (['--silent'], dict( dest='silent', action='store_true') ), (['--combined-ref'], dict( dest='is_combined_ref', action='store_true') ), (['--colors'], dict( dest='used_colors', action='extend') ), (['--ls'], dict( dest='used_ls', action='extend') ), (['-j', '--save-json'], dict( dest='save_json', action='store_true') ), (['-J', '--save-json-to'], dict( dest='json_output_dirpath') ), (['--err-fpath'], dict( dest='error_log_fpath') ), (['--read-support'], dict( dest='calculate_read_support', action='store_true') ) ] if is_metaquast: options += [ (['--unique-mapping'], dict( dest='unique_mapping', action='store_true') ), (['--max-ref-number'], dict( dest='max_references', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0}) ), (['--references-list'], dict( dest='references_txt') ), (['--blast-db'], dict( dest='custom_blast_db_fpath') ) ] parser = OptionParser(option_class=QuastOption) for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, contigs_fpaths) = parser.parse_args(quast_args[1:]) if qconfig.test_sv and is_metaquast: msg = "Option --test-sv can be used for QUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test_no_ref and not is_metaquast: msg = "Option --test-no-ref can be used for MetaQUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test or qconfig.test_no_ref or qconfig.test_sv: qconfig.output_dirpath = abspath(qconfig.test_output_dirname) check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' + 'Please, rerun QUAST from a different directory.') if qconfig.test or qconfig.test_sv: qconfig.reference = meta_test_references if is_metaquast else test_reference if not is_metaquast: qconfig.genes = test_genes qconfig.operons = test_operons qconfig.glimmer = True qconfig.gene_finding = True if qconfig.test_sv: qconfig.forward_reads = test_forward_reads qconfig.reverse_reads = test_reverse_reads contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths qconfig.test = True if any(not isfile(fpath) for fpath in contigs_fpaths): logger.info( '\nYou are probably running QUAST installed via pip, which does not include test data.\n' 'This is fine, just start using QUAST on your own data!\n' 'If you still want to run tests, please download test_data directory from \n' 'https://github.com/ablab/quast/ to CWD, or install QUAST from source:\n' 'git clone https://github.com/ablab/quast && cd quast && ./setup.py install\n') sys.exit(2) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n", to_stderr=True) qconfig.usage(meta=is_metaquast, stream=sys.stderr) sys.exit(2) if qconfig.large_genome: set_large_genome_parameters() for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') if qconfig.json_output_dirpath: qconfig.save_json = True if not qconfig.output_dirpath: check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' + 'Please, specify a different directory using -o option.') qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \ set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath, qconfig.save_json if not is_metaquast else None) logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath) logger.set_up_console_handler(debug=qconfig.debug) logger.print_command_line(quast_args, wrap_after=None, is_main=True) logger.start() if existing_alignments and not is_metaquast: logger.notice("Output directory already exists. Existing Nucmer alignments can be used") qutils.remove_reports(qconfig.output_dirpath) if qconfig.labels: qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths) qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")] if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")] qconfig.set_max_threads(logger) if parser.values.ambiguity_score: if qconfig.ambiguity_usage != 'all': qconfig.ambiguity_usage = 'all' logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified") if is_metaquast: quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths) if qconfig.sam_fpaths or qconfig.bam_fpaths: check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger) return quast_py_args, contigs_fpaths
def align_single_file(fpath, main_output_dir, output_dirpath, log_path, err_fpath, max_threads, sam_fpath=None, bam_fpath=None, index=None, required_files=None, is_reference=False, alignment_only=False, using_reads='all'): filename = qutils.name_from_fpath(fpath) if not sam_fpath and bam_fpath: sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam') else: sam_fpath = sam_fpath or join(output_dirpath, filename + '.sam') bam_fpath = bam_fpath or get_safe_fpath(output_dirpath, sam_fpath[:-4] + '.bam') if using_reads != 'all': sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.sam') bam_fpath = sam_fpath.replace('.sam', '.bam') if alignment_only or (is_reference and required_files and any(f.endswith('bed') for f in required_files)): required_files.append(sam_fpath) stats_fpath = get_safe_fpath(dirname(output_dirpath), filename + '.stat') index_str = qutils.index_to_str(index) if index is not None else '' reads_fpaths = qconfig.reads_fpaths correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) can_reuse = correct_chr_names is not None if not can_reuse and not reads_fpaths: return None, None, None if correct_chr_names and (not required_files or all(isfile(fpath) for fpath in required_files)): if not alignment_only: if isfile(stats_fpath): logger.info(' ' + index_str + 'Using existing flag statistics file ' + stats_fpath) elif isfile(bam_fpath): qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath], stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a')) analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger) calc_lap_score(reads_fpaths, sam_fpath, index, index_str, output_dirpath, fpath, filename, err_fpath) if isfile(stats_fpath) or alignment_only: return correct_chr_names, sam_fpath, bam_fpath logger.info(' ' + index_str + 'Pre-processing reads...') if is_non_empty_file(sam_fpath) and can_reuse: logger.info(' ' + index_str + 'Using existing SAM-file: ' + sam_fpath) correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) elif is_non_empty_file(bam_fpath) and can_reuse: logger.info(' ' + index_str + 'Using existing BAM-file: ' + bam_fpath) sambamba_view(bam_fpath, sam_fpath, qconfig.max_threads, err_fpath, logger) correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) if (not correct_chr_names or not is_non_empty_file(sam_fpath)) and reads_fpaths: if is_reference: logger.info(' Running BWA for reference...') else: logger.info(' ' + index_str + 'Running BWA...') # use absolute paths because we will change workdir fpath = abspath(fpath) sam_fpath = abspath(sam_fpath) prev_dir = os.getcwd() os.chdir(output_dirpath) bwa_index(fpath, err_fpath, logger) sam_fpaths = align_reads(fpath, sam_fpath, using_reads, main_output_dir, err_fpath, max_threads) if len(sam_fpaths) > 1: merge_sam_files(sam_fpaths, sam_fpath, bam_fpath, main_output_dir, max_threads, err_fpath) elif len(sam_fpaths) == 1: shutil.move(sam_fpaths[0], sam_fpath) sambamba_view(sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None) logger.info(' ' + index_str + 'Done.') os.chdir(prev_dir) if not is_non_empty_file(sam_fpath): logger.error(' Failed running BWA for ' + fpath + '. See ' + log_path + ' for information.') return None, None, None correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) elif not correct_chr_names or not is_non_empty_file(sam_fpath): return None, None, None if is_reference: logger.info(' Sorting SAM-file for reference...') else: logger.info(' ' + index_str + 'Sorting SAM-file...') if can_reuse and is_non_empty_file(bam_fpath) and all_read_names_correct(sam_fpath): logger.info(' ' + index_str + 'Using existing BAM-file: ' + bam_fpath) else: correct_sam_fpath = join(output_dirpath, filename + '.correct.sam') # write in output dir sam_fpath = clean_read_names(sam_fpath, correct_sam_fpath) sambamba_view(correct_sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None) qutils.assert_file_exists(bam_fpath, 'bam file') if not alignment_only: if isfile(stats_fpath): logger.info(' ' + index_str + 'Using existing flag statistics file ' + stats_fpath) elif isfile(bam_fpath): qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath], stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a')) analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger) calc_lap_score(reads_fpaths, sam_fpath, index, index_str, output_dirpath, fpath, filename, err_fpath) if is_reference: logger.info(' Analysis for reference is finished.') else: logger.info(' ' + index_str + 'Analysis is finished.') return correct_chr_names, sam_fpath, bam_fpath
def check_file(option, opt, value): files = value.split(',') for f in files: assert_file_exists(f, option.dest) return value
def align_single_file(fpath, main_output_dir, output_dirpath, log_path, err_fpath, max_threads, sam_fpath=None, bam_fpath=None, index=None, required_files=None, is_reference=False, alignment_only=False, using_reads='all'): filename = qutils.name_from_fpath(fpath) if not sam_fpath and bam_fpath: sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam') else: sam_fpath = sam_fpath or join(output_dirpath, filename + '.sam') bam_fpath = bam_fpath or get_safe_fpath(output_dirpath, sam_fpath[:-4] + '.bam') if using_reads != 'all': sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.sam') bam_fpath = sam_fpath.replace('.sam', '.bam') if alignment_only or (is_reference and required_files and any(f.endswith('bed') for f in required_files)): required_files.append(sam_fpath) stats_fpath = get_safe_fpath(dirname(output_dirpath), filename + '.stat') index_str = qutils.index_to_str(index) if index is not None else '' reads_fpaths = qconfig.reads_fpaths correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) can_reuse = correct_chr_names is not None if not can_reuse and not reads_fpaths: return None, None, None if correct_chr_names and (not required_files or all(isfile(fpath) for fpath in required_files)): if not alignment_only: if isfile(stats_fpath): logger.info(' ' + index_str + 'Using existing flag statistics file ' + stats_fpath) elif isfile(bam_fpath): qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath], stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a')) analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger) if isfile(stats_fpath) or alignment_only: return correct_chr_names, sam_fpath, bam_fpath logger.info(' ' + index_str + 'Pre-processing reads...') if is_non_empty_file(sam_fpath) and can_reuse: logger.info(' ' + index_str + 'Using existing SAM-file: ' + sam_fpath) correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) elif is_non_empty_file(bam_fpath) and can_reuse: logger.info(' ' + index_str + 'Using existing BAM-file: ' + bam_fpath) sambamba_view(bam_fpath, sam_fpath, qconfig.max_threads, err_fpath, logger) correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) if (not correct_chr_names or not is_non_empty_file(sam_fpath)) and reads_fpaths: if is_reference: logger.info(' Running BWA for reference...') else: logger.info(' ' + index_str + 'Running BWA...') # use absolute paths because we will change workdir fpath = abspath(fpath) sam_fpath = abspath(sam_fpath) prev_dir = os.getcwd() os.chdir(output_dirpath) bwa_index(fpath, err_fpath, logger) sam_fpaths = align_reads(fpath, sam_fpath, using_reads, main_output_dir, err_fpath, max_threads) if len(sam_fpaths) > 1: merge_sam_files(sam_fpaths, sam_fpath, bam_fpath, max_threads, err_fpath) elif len(sam_fpaths) == 1: shutil.move(sam_fpaths[0], sam_fpath) tmp_bam_fpath = sam_fpaths[0].replace('.sam', '.bam') if is_non_empty_file(tmp_bam_fpath): shutil.move(tmp_bam_fpath, bam_fpath) logger.info(' ' + index_str + 'Done.') os.chdir(prev_dir) if not is_non_empty_file(sam_fpath): logger.error(' Failed running BWA for ' + fpath + '. See ' + log_path + ' for information.') return None, None, None correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference) elif not correct_chr_names or not is_non_empty_file(sam_fpath): return None, None, None if is_reference: logger.info(' Sorting SAM-file for reference...') else: logger.info(' ' + index_str + 'Sorting SAM-file...') if can_reuse and is_non_empty_file(bam_fpath) and all_read_names_correct(sam_fpath): logger.info(' ' + index_str + 'Using existing BAM-file: ' + bam_fpath) else: correct_sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.correct.sam') # write in output dir sam_fpath = clean_read_names(sam_fpath, correct_sam_fpath) sambamba_view(correct_sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None) qutils.assert_file_exists(bam_fpath, 'bam file') if not alignment_only: if isfile(stats_fpath): logger.info(' ' + index_str + 'Using existing flag statistics file ' + stats_fpath) elif isfile(bam_fpath): qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath], stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a')) analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger) if is_reference: logger.info(' Analysis for reference is finished.') else: logger.info(' ' + index_str + 'Analysis is finished.') return correct_chr_names, sam_fpath, bam_fpath
def parse_options(logger, quast_args, is_metaquast=False): if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args: qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False) sys.exit(0) if '-v' in quast_args or '--version' in quast_args: qconfig.print_version(meta=is_metaquast) sys.exit(0) quast_py_args = quast_args[1:] options = [ (['--debug'], dict(dest='debug', action='store_true')), (['--no-portable-html'], dict(dest='portable_html', action='store_false')), (['--test'], dict(dest='test', action='store_true')), (['--test-sv'], dict(dest='test_sv', action='store_true')), (['--test-no-ref'], dict(dest='test_no_ref', action='store_true')), (['-o', '--output-dir'], dict(dest='output_dirpath', type='string', action='callback', callback=check_output_dir, callback_args=(logger, ))), (['-t', '--threads'], dict(dest='max_threads', type='int', action='callback', callback=check_arg_value, callback_args=(logger, ), callback_kwargs={ 'default_value': 1, 'min_value': 1 })), (['-R', '--reference'], dict(dest='reference', type='string' if is_metaquast else 'file', action='callback' if is_metaquast else 'store', callback_args=(logger, ) if is_metaquast else None, callback=parse_meta_references if is_metaquast else None)), (['-G', '--genes'], dict(dest='genes', type='file', action='extend')), (['-O', '--operons'], dict(dest='operons', type='file', action='extend')), (['-1', '--reads1'], dict(dest='forward_reads', type='file')), (['-2', '--reads2'], dict(dest='reverse_reads', type='file')), (['--sam'], dict(dest='sam', type='file')), (['--bam'], dict(dest='bam', type='file')), (['--sv-bedpe'], dict(dest='bed', type='file')), (['--cov'], dict(dest='cov_fpath', type='file')), (['--phys-cov'], dict(dest='phys_cov_fpath', type='file')), (['-l', '--labels'], dict(dest='labels', type='string')), (['-L'], dict(dest='all_labels_from_dirs', action='store_true')), (['--mgm'], dict(dest='metagenemark', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_true_values': ['gene_finding', 'metagenemark'] }, default=False)), (['-s', '--scaffolds'], dict(dest='scaffolds', action='store_true')), (['-e', '--eukaryote'], dict(dest='prokaryote', action='store_false')), (['-f', '--gene-finding'], dict(dest='gene_finding', action='store_true')), (['--fragmented'], dict(dest='check_for_fragmented_ref', action='store_true')), (['--fragmented-max-indent'], dict(dest='fragmented_max_indent', type='int', default=qconfig.MAX_INDEL_LENGTH, action='callback', callback=set_fragmented_max_indent, callback_args=(logger, ))), (['-a', '--ambiguity-usage'], dict(dest='ambiguity_usage', type='string', default=qconfig.ambiguity_usage, action='callback', callback=check_str_arg_value, callback_args=(logger, ), callback_kwargs={'available_values': ['none', 'one', 'all']})), (['--ambiguity-score'], dict(dest='ambiguity_score', type='float', action='callback', callback=check_arg_value, callback_args=(logger, ), callback_kwargs={ 'min_value': 0.8, 'max_value': 1.0 })), (['-u', '--use-all-alignments'], dict(dest='use_all_alignments', action='store_true')), (['--strict-NA'], dict(dest='strict_NA', action='store_true')), (['--unaligned-part-size'], dict(dest='unaligned_part_size', type=int)), (['-x', '--extensive-mis-size'], dict(dest='extensive_misassembly_threshold', type='int', default=qconfig.extensive_misassembly_threshold, action='callback', callback=set_extensive_mis_size, callback_args=(logger, ))), (['--scaffold-gap-max-size'], dict(dest='scaffolds_gap_threshold', type=int)), (['-m', '--min-contig'], dict(dest='min_contig', type='int')), (['-c', '--min-cluster'], dict(dest='min_cluster', type='int')), (['-i', '--min-alignment'], dict(dest='min_alignment', type='int')), (['--min-identity'], dict(dest='min_IDY', type='float', default=qconfig.min_IDY, action='callback', callback=check_arg_value, callback_args=(logger, ), callback_kwargs={ 'min_value': 80.0, 'max_value': 100.0 })), (['--est-ref-size'], dict(dest='estimated_reference_size', type='int')), (['--contig-thresholds'], dict(dest='contig_thresholds')), (['--gene-thresholds'], dict(dest='genes_lengths')), (['--gage'], dict(dest='with_gage', action='store_true')), (['--glimmer'], dict(dest='glimmer', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_true_values': ['gene_finding', 'glimmer'] }, default=False)), (['--plots-format'], dict(dest='plot_extension', type='string', action='callback', callback=check_str_arg_value, callback_args=(logger, ), callback_kwargs={ 'available_values': qconfig.supported_plot_extensions })), (['--use-input-ref-order'], dict(dest='use_input_ref_order', action='store_true')), (['--svg'], dict(dest='draw_svg', action='store_true')), (['--fast'], dict(dest='fast', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_true_values': ['no_gc', 'no_sv', 'no_gzip'], 'store_false_values': [ 'show_snps', 'draw_plots', 'html_report', 'create_icarus_html' ] }, default=False)), (['--no-gzip'], dict(dest='no_gzip', action='store_true')), (['--no-check'], dict(dest='no_check', action='store_true')), (['--no-check-meta'], dict(dest='no_check_meta', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_true_values': ['no_check', 'no_check_meta'] })), (['--no-snps'], dict(dest='show_snps', action='store_false')), (['--no-plots'], dict(dest='draw_plots', action='store_false')), (['--no-html'], dict(dest='html_report', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_false_values': ['html_report', 'create_icarus_html'] })), (['--no-icarus'], dict(dest='create_icarus_html', action='store_false')), (['--no-gc'], dict(dest='no_gc', action='store_true')), (['--no-sv'], dict(dest='no_sv', action='store_true')), (['--memory-efficient'], dict(dest='memory_efficient', action='store_true')), (['--space-efficient'], dict( dest='space_efficient', action='callback', callback=set_multiple_variables, callback_kwargs={ 'store_true_values': ['space_efficient'], 'store_false_values': ['create_icarus_html'] }, )), (['--force-nucmer'], dict(dest='force_nucmer', action='store_true')), (['--silent'], dict(dest='silent', action='store_true')), (['--combined-ref'], dict(dest='is_combined_ref', action='store_true')), (['--colors'], dict(dest='used_colors', action='extend')), (['--ls'], dict(dest='used_ls', action='extend')), (['-j', '--save-json'], dict(dest='save_json', action='store_true')), (['-J', '--save-json-to'], dict(dest='json_output_dirpath')), (['--err-fpath'], dict(dest='error_log_fpath')), (['--read-support'], dict(dest='calculate_read_support', action='store_true')) ] if is_metaquast: options += [(['--unique-mapping'], dict(dest='unique_mapping', action='store_true')), (['--max-ref-number'], dict(dest='max_references', type='int', action='callback', callback=check_arg_value, callback_args=(logger, ), callback_kwargs={ 'default_value': qconfig.max_references, 'min_value': 0 })), (['--references-list'], dict(dest='references_txt')), (['--blast-db'], dict(dest='custom_blast_db_fpath'))] parser = OptionParser(option_class=QuastOption) for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, contigs_fpaths) = parser.parse_args(quast_args[1:]) if qconfig.test_sv and is_metaquast: msg = "Option --test-sv can be used for QUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test_no_ref and not is_metaquast: msg = "Option --test-no-ref can be used for MetaQUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test or qconfig.test_no_ref or qconfig.test_sv: qconfig.output_dirpath = abspath(qconfig.test_output_dirname) check_dirpath( qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' + 'Please, rerun QUAST from a different directory.') if qconfig.test or qconfig.test_sv: qconfig.reference = meta_test_references if is_metaquast else test_reference if not is_metaquast: qconfig.genes = test_genes qconfig.operons = test_operons qconfig.glimmer = True qconfig.gene_finding = True if qconfig.test_sv: qconfig.forward_reads = test_forward_reads qconfig.reverse_reads = test_reverse_reads contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths qconfig.test = True if any(not isfile(fpath) for fpath in contigs_fpaths): logger.info( '\nYou are probably running QUAST installed via pip, which does not include test data.\n' 'This is fine, just start using QUAST on your own data!\n\n' 'If you still want to run tests, please download and unpack test data to CWD:\n' ' wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n' ) sys.exit(2) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n", to_stderr=True) qconfig.usage(meta=is_metaquast, stream=sys.stderr) sys.exit(2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') if qconfig.json_output_dirpath: qconfig.save_json = True if not qconfig.output_dirpath: check_dirpath( os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' + 'Please, specify a different directory using -o option.') qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \ set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath, qconfig.save_json if not is_metaquast else None) logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath) logger.set_up_console_handler(debug=qconfig.debug) logger.print_command_line(quast_args, wrap_after=None, is_main=True) logger.start() if existing_alignments and not is_metaquast: logger.notice( "Output directory already exists. Existing Nucmer alignments can be used" ) qutils.remove_reports(qconfig.output_dirpath) if qconfig.labels: qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths) qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = [ int(x) for x in qconfig.contig_thresholds.split(",") ] if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = [ int(x) for x in qconfig.genes_lengths.split(",") ] qconfig.set_max_threads(logger) if parser.values.ambiguity_score: if qconfig.ambiguity_usage != 'all': qconfig.ambiguity_usage = 'all' logger.notice( "--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified" ) if is_metaquast: quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths) return quast_py_args, contigs_fpaths
def parse_options(logger, quast_args, is_metaquast=False): if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args: qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False) sys.exit(0) if '-v' in quast_args or '--version' in quast_args: qconfig.print_version(meta=is_metaquast) sys.exit(0) quast_py_args = quast_args[1:] options = [ (['--debug'], dict( dest='debug', action='store_true') ), (['--test'], dict( dest='test', action='store_true') ), (['--test-sv'], dict( dest='test_sv', action='store_true') ), (['--test-no-ref'], dict( dest='test_no_ref', action='store_true') ), (['-o', '--output-dir'], dict( dest='output_dirpath', type='string', action='callback', callback=check_output_dir, callback_args=(logger,)) ), (['-t', '--threads'], dict( dest='max_threads', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': 1, 'min_value': 1}) ), (['-R', '--reference'], dict( dest='reference', type='string' if is_metaquast else 'file', action='callback' if is_metaquast else 'store', callback_args=(logger,) if is_metaquast else None, callback=parse_meta_references if is_metaquast else None) ), (['-G', '--genes'], dict( dest='genes', type='file', action='extend') ), (['-O', '--operons'], dict( dest='operons', type='file', action='extend') ), (['-1', '--reads1'], dict( dest='forward_reads', type='file') ), (['-2', '--reads2'], dict( dest='reverse_reads', type='file') ), (['--sam'], dict( dest='sam', type='file') ), (['--bam'], dict( dest='bam', type='file') ), (['--sv-bedpe'], dict( dest='bed', type='file') ), (['-l', '--labels'], dict( dest='labels', type='string') ), (['-L'], dict( dest='all_labels_from_dirs', action='store_true') ), (['--meta'], dict( dest='meta', action='store_true') ), (['-s', '--scaffolds'], dict( dest='scaffolds', action='store_true') ), (['-e', '--eukaryote'], dict( dest='prokaryote', action='store_false') ), (['-f', '--gene-finding'], dict( dest='gene_finding', action='store_true') ), (['--fragmented'], dict( dest='check_for_fragmented_ref', action='store_true') ), (['-a', '--ambiguity-usage'], dict( dest='ambiguity_usage', type='string', default=qconfig.ambiguity_usage, action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': ['none', 'one', 'all']}) ), (['--ambiguity-score'], dict( dest='ambiguity_score', type='float', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 0.8, 'max_value': 1.0}) ), (['-u', '--use-all-alignments'], dict( dest='use_all_alignments', action='store_true') ), (['--strict-NA'], dict( dest='strict_NA', action='store_true') ), (['--significant-part-size'], dict( dest='significant_part_size', type=int) ), (['-x', '--extensive-mis-size'], dict( dest='extensive_misassembly_threshold', type='int', default=qconfig.extensive_misassembly_threshold, action='callback', callback=set_extensive_mis_size, callback_args=(logger,)) ), (['-m', '--min-contig'], dict( dest='min_contig', type='int') ), (['-c', '--min-cluster'], dict( dest='min_cluster', type='int') ), (['-i', '--min-alignment'], dict( dest='min_alignment', type='int') ), (['--min-identity'], dict( dest='min_IDY', type='float', default=qconfig.min_IDY, action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'min_value': 80.0, 'max_value': 100.0}) ), (['--est-ref-size'], dict( dest='estimated_reference_size', type='int') ), (['--contig-thresholds'], dict( dest='contig_thresholds') ), (['--gene-thresholds'], dict( dest='genes_lengths') ), (['--gage'], dict( dest='with_gage', action='store_true') ), (['--glimmer'], dict( dest='glimmer', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']}, default=False) ), (['--plots-format'], dict( dest='plot_extension', type='string', action='callback', callback=check_str_arg_value, callback_args=(logger,), callback_kwargs={'available_values': qconfig.supported_plot_extensions}) ), (['--svg'], dict( dest='draw_svg', action='store_true') ), (['--fast'], dict( dest='fast', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'], 'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']}, default=False) ), (['--no-gzip'], dict( dest='no_gzip', action='store_true') ), (['--no-check'], dict( dest='no_check', action='store_true') ), (['--no-check-meta'], dict( dest='no_check_meta', action='callback', callback=set_multiple_variables, callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']}) ), (['--no-snps'], dict( dest='show_snps', action='store_false') ), (['--no-plots'], dict( dest='draw_plots', action='store_false') ), (['--no-html'], dict( dest='html_report', action='callback', callback=set_multiple_variables, callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']}) ), (['--no-icarus'], dict( dest='create_icarus_html', action='store_false') ), (['--no-gc'], dict( dest='no_gc', action='store_true') ), (['--no-sv'], dict( dest='no_sv', action='store_true') ), (['--memory-efficient'], dict( dest='memory_efficient', action='store_true') ), (['--silent'], dict( dest='silent', action='store_true') ), (['--combined-ref'], dict( dest='is_combined_ref', action='store_true') ), (['--colors'], dict( dest='used_colors', action='extend') ), (['--ls'], dict( dest='used_ls', action='extend') ), (['-j', '--save-json'], dict( dest='save_json', action='store_true') ), (['-J', '--save-json-to'], dict( dest='json_output_dirpath') ), (['--err-fpath'], dict( dest='error_log_fpath') ), (['--read-support'], dict( dest='calculate_read_support', action='store_true') ) ] if is_metaquast: options += [ (['--unique-mapping'], dict( dest='unique_mapping', action='store_true') ), (['--max-ref-number'], dict( dest='max_references', type='int', action='callback', callback=check_arg_value, callback_args=(logger,), callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0}) ), (['--references-list'], dict( dest='references_txt') ) ] parser = OptionParser(option_class=QuastOption) for args, kwargs in options: parser.add_option(*args, **kwargs) (opts, contigs_fpaths) = parser.parse_args(quast_args[1:]) if qconfig.test_sv and is_metaquast: msg = "Option --test-sv can be used for QUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test_no_ref and not is_metaquast: msg = "Option --test-no-ref can be used for MetaQUAST only\n" wrong_test_option(logger, msg, is_metaquast) if qconfig.test or qconfig.test_no_ref or qconfig.test_sv: qconfig.output_dirpath = abspath(qconfig.test_output_dirname) if qconfig.test or qconfig.test_sv: qconfig.reference = meta_test_references if is_metaquast else test_reference if not is_metaquast: qconfig.genes = test_genes qconfig.operons = test_operons qconfig.with_gage = True qconfig.glimmer = True qconfig.gene_finding = True qconfig.prokaryote = False if qconfig.test_sv: qconfig.forward_reads = test_forward_reads qconfig.reverse_reads = test_reverse_reads contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths qconfig.test = True if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=is_metaquast) sys.exit(2) logger.set_up_console_handler(debug=qconfig.debug) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') if qconfig.json_output_dirpath: qconfig.save_json = True qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \ set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath, qconfig.save_json if not is_metaquast else None) logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath) logger.print_command_line(quast_args, wrap_after=None, is_main=True) logger.start() if existing_alignments and not is_metaquast: logger.notice("Output directory already exists. Existing Nucmer alignments can be used") qutils.remove_reports(qconfig.output_dirpath) if qconfig.labels: qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths) qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = map(int, qconfig.contig_thresholds.split(",")) if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = map(int, qconfig.genes_lengths.split(",")) qconfig.set_max_threads(logger) if parser.values.ambiguity_score: if qconfig.ambiguity_usage != 'all': qconfig.ambiguity_usage = 'all' logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified") if is_metaquast: quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths) return quast_py_args, contigs_fpaths