Esempio n. 1
0
def main(args):
    check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '.\n' +
                  'Please, put QUAST in a different directory, then try again.\n', exit_code=3)

    if not args:
        qconfig.usage(meta=True)
        sys.exit(0)

    metaquast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, metaquast_path + args, is_metaquast=True)
    output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    html_report = qconfig.html_report
    test_mode = qconfig.test

    # Directories
    output_dirpath, _, _ = qutils.set_up_output_dir(
        output_dirpath, None, not output_dirpath,
        save_json=False)

    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)

    qconfig.set_max_threads(logger)
    qutils.logger = logger

    ########################################################################

    from quast_libs import reporting
    try:
        import imp
        imp.reload(reporting)
    except:
        reload(reporting)
    from quast_libs import plotter

    if os.path.isdir(corrected_dirpath):
        shutil.rmtree(corrected_dirpath)
    os.mkdir(corrected_dirpath)

    # PROCESSING REFERENCES
    if ref_fpaths:
        logger.main_info()
        logger.main_info('Reference(s):')

        corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
            correct_meta_references(ref_fpaths, corrected_dirpath)

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')
    qconfig.no_check_meta = True
    assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels)
    if not assemblies:
        logger.error("None of the assembly files contains correct contigs. "
                     "Please, provide different files or decrease --min-contig threshold.")
        return 4

    # Running QUAST(s)
    quast_py_args += ['--meta']
    downloaded_refs = False

    # SEARCHING REFERENCES
    if not ref_fpaths:
        logger.main_info()
        if qconfig.max_references == 0:
            logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled")
        else:
            if qconfig.references_txt:
                logger.main_info("List of references was provided, starting to download reference genomes from NCBI...")
            else:
                logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database "
                        "and to download them from NCBI...")
            downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname)
            if not os.path.isdir(downloaded_dirpath):
                os.mkdir(downloaded_dirpath)
            corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
            ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, corrected_dirpath, qconfig.references_txt)
            if ref_fpaths:
                search_references_meta.is_quast_first_run = True
                if not qconfig.references_txt:
                    downloaded_refs = True
                logger.main_info()
                logger.main_info('Downloaded reference(s):')
                corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
                    correct_meta_references(ref_fpaths, corrected_dirpath)
            elif test_mode and not ref_fpaths:
                logger.error('Failed to download or setup SILVA 16S rRNA database for working without '
                             'references on metagenome datasets!', to_stderr=True, exit_with_code=4)

    if not ref_fpaths:
        # No references, running regular quast with MetaGenemark gene finder
        logger.main_info()
        logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder')
        _start_quast_main(quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, run_regular_quast=True)
        exit(0)

    # Running combined reference
    combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name)

    reads_fpaths = []
    if qconfig.forward_reads:
        reads_fpaths.append(qconfig.forward_reads)
    if qconfig.reverse_reads:
        reads_fpaths.append(qconfig.reverse_reads)
    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpaths:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths,
                                                   os.path.join(combined_output_dirpath, qconfig.variation_dirname),
                                                   external_logger=logger, sam_fpath=qconfig.sam, bam_fpath=qconfig.bam, bed_fpath=qconfig.bed)
        qconfig.bed = bed_fpath

    if qconfig.bed:
        quast_py_args += ['--sv-bed']
        quast_py_args += [qconfig.bed]
    if cov_fpath:
        quast_py_args += ['--cov']
        quast_py_args += [cov_fpath]
    if physical_cov_fpath:
        quast_py_args += ['--phys-cov']
        quast_py_args += [physical_cov_fpath]
    if qconfig.sam:
        quast_py_args += ['--sam']
        quast_py_args += [qconfig.sam]
    if qconfig.bam:
        quast_py_args += ['--bam']
        quast_py_args += [qconfig.bam]

    quast_py_args += ['--combined-ref']
    if qconfig.draw_plots or qconfig.html_report:
        if plotter.dict_color_and_ls:
            colors_and_ls = [plotter.dict_color_and_ls[asm.label] for asm in assemblies]
            quast_py_args += ['--colors']
            quast_py_args += [','.join([style[0] for style in colors_and_ls])]
            quast_py_args += ['--ls']
            quast_py_args += [','.join([style[1] for style in colors_and_ls])]
    run_name = 'for the combined reference'
    logger.main_info()
    logger.main_info('Starting quast.py ' + run_name + '...')
    total_num_notices = 0
    total_num_warnings = 0
    total_num_nf_errors = 0
    total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors)
    if qconfig.html_report:
        from quast_libs.html_saver import json_saver
        json_texts = []
    else:
        json_texts = None
    if qconfig.unique_mapping:
        ambiguity_opts = []
    else:
        ambiguity_opts = ["--ambiguity-usage", 'all']
    return_code, total_num_notifications, assemblies, labels = \
        _start_quast_main(quast_py_args + ambiguity_opts,
        assemblies=assemblies,
        reference_fpath=combined_ref_fpath,
        output_dirpath=combined_output_dirpath,
        num_notifications_tuple=total_num_notifications, is_first_run=True)

    if json_texts is not None:
        json_texts.append(json_saver.json_text)
    search_references_meta.is_quast_first_run = False

    genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats')
    genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt')
    if not os.path.exists(genome_info_fpath):
        logger.main_info('')
        if not downloaded_refs:
            msg = 'Try to restart MetaQUAST with another references.'
        else:
            msg = 'Try to use option --max-ref-number to change maximum number of references (per each assembly) to download.'
        logger.main_info('Failed aligning the contigs for all the references. ' + msg)
        logger.main_info('')
        cleanup(corrected_dirpath)
        logger.main_info('MetaQUAST finished.')
        return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)

    if downloaded_refs:
        logger.main_info()
        logger.main_info('Excluding downloaded references with low genome fraction from further analysis..')
        corr_ref_fpaths = get_downloaded_refs_with_alignments(genome_info_fpath, ref_fpaths, chromosomes_by_refs)
        if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths:
            logger.main_info()
            logger.main_info('Filtered reference(s):')
            os.remove(combined_ref_fpath)
            contigs_analyzer.ref_labels_by_chromosomes = {}
            corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names = \
                correct_meta_references(corr_ref_fpaths, corrected_dirpath)
            run_name = 'for the corrected combined reference'
            logger.main_info()
            logger.main_info('Starting quast.py ' + run_name + '...')
            return_code, total_num_notifications, assemblies, labels = \
                _start_quast_main(quast_py_args + ambiguity_opts,
                assemblies=assemblies,
                reference_fpath=combined_ref_fpath,
                output_dirpath=combined_output_dirpath,
                num_notifications_tuple=total_num_notifications, is_first_run=True)
            if json_texts is not None:
                json_texts = json_texts[:-1]
                json_texts.append(json_saver.json_text)
        elif corr_ref_fpaths == ref_fpaths:
            logger.main_info('All downloaded references have genome fraction more than 10%. Nothing was excluded.')
        else:
            logger.main_info('All downloaded references have low genome fraction. Nothing was excluded for now.')

    if qconfig.calculate_read_support:
        calculate_ave_read_support(combined_output_dirpath, assemblies)

    for arg in args:
        if arg in ('-s', "--scaffolds"):
            quast_py_args.remove(arg)
    quast_py_args += ['--no-check-meta']
    qconfig.contig_thresholds = ','.join([str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig])
    if not qconfig.contig_thresholds:
        qconfig.contig_thresholds = 'None'
    quast_py_args = remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds)
    quast_py_args += ['--contig-thresholds']
    quast_py_args += [qconfig.contig_thresholds]
    quast_py_args.remove('--combined-ref')

    logger.main_info()
    logger.main_info('Partitioning contigs into bins aligned to each reference..')

    assemblies_by_reference, not_aligned_assemblies = partition_contigs(
        assemblies, corrected_ref_fpaths, corrected_dirpath,
        os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels)

    ref_names = []
    output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname)
    for ref_fpath, ref_assemblies in assemblies_by_reference:
        ref_name = qutils.name_from_fpath(ref_fpath)
        logger.main_info('')
        if not ref_assemblies:
            logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..')
        else:
            ref_names.append(ref_name)
            run_name = 'for the contigs aligned to ' + ref_name
            logger.main_info('Starting quast.py ' + run_name)

            return_code, total_num_notifications = _start_quast_main(quast_py_args,
                assemblies=ref_assemblies,
                reference_fpath=ref_fpath,
                output_dirpath=os.path.join(output_dirpath_per_ref, ref_name),
                num_notifications_tuple=total_num_notifications)
            if json_texts is not None:
                json_texts.append(json_saver.json_text)

    # Finally running for the contigs that has not been aligned to any reference
    no_unaligned_contigs = True
    for assembly in not_aligned_assemblies:
        if os.path.isfile(assembly.fpath) and os.stat(assembly.fpath).st_size != 0:
            no_unaligned_contigs = False
            break

    run_name = 'for the contigs not aligned anywhere'
    logger.main_info()
    if no_unaligned_contigs:
        logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)')
    else:
        logger.main_info('Starting quast.py ' + run_name + '...')

        return_code, total_num_notifications = _start_quast_main(quast_py_args,
            assemblies=not_aligned_assemblies,
            output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name),
            num_notifications_tuple=total_num_notifications)

        if return_code not in [0, 4]:
            logger.error('Error running quast.py for the contigs not aligned anywhere')
        elif return_code == 4:  # no unaligned contigs, i.e. everything aligned
            no_unaligned_contigs = True
        if not no_unaligned_contigs:
            if json_texts is not None:
                json_texts.append(json_saver.json_text)

    if ref_names:
        logger.print_timestamp()
        logger.main_info("Summarizing results...")

        summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir)
        if not os.path.isdir(summary_output_dirpath):
            os.makedirs(summary_output_dirpath)
        if html_report and json_texts:
            from quast_libs.html_saver import html_saver
            html_summary_report_fpath = html_saver.init_meta_report(output_dirpath)
        else:
            html_summary_report_fpath = None
        from quast_libs import create_meta_summary
        metrics_for_plots = reporting.Fields.main_metrics
        misassembl_metrics = [reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION,
                           reporting.Fields.MIS_ISTRANSLOCATIONS]
        if no_unaligned_contigs:
            full_ref_names = ref_names
        else:
            full_ref_names = ref_names + [qconfig.not_aligned_name]
        create_meta_summary.do(html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath,
                               output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, full_ref_names)
        if html_report and json_texts:
            html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True)
            if qconfig.create_icarus_html:
                icarus_html_fpath = html_saver.create_meta_icarus(output_dirpath, ref_names)
                logger.main_info('  Icarus (contig browser) is saved to %s' % icarus_html_fpath)
            html_saver.create_meta_report(output_dirpath, json_texts)

    cleanup(corrected_dirpath)
    logger.main_info('')
    logger.main_info('MetaQUAST finished.')
    return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
Esempio n. 2
0
def main(args):
    check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '.\n' +
                  'Please, put QUAST in a different directory, then try again.\n', exit_code=3)

    if not args:
        qconfig.usage(stream=sys.stderr)
        sys.exit(1)

    metaquast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, metaquast_path + args)
    output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    html_report = qconfig.html_report
    test_mode = qconfig.test

    # Directories
    output_dirpath, _, _ = qutils.set_up_output_dir(
        output_dirpath, None, not output_dirpath,
        save_json=False)

    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)

    qconfig.set_max_threads(logger)
    qutils.logger = logger

    ########################################################################

    from quast_libs import reporting
    try:
        import importlib
        importlib.reload(reporting)
    except (ImportError, AttributeError):
        reload(reporting)
    from quast_libs import plotter

    if os.path.isdir(corrected_dirpath):
        shutil.rmtree(corrected_dirpath)
    os.mkdir(corrected_dirpath)

    # PROCESSING REFERENCES
    if ref_fpaths:
        logger.main_info()
        logger.main_info('Reference(s):')

        corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
            correct_meta_references(ref_fpaths, corrected_dirpath)

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')
    qconfig.no_check_meta = True
    assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels)
    if not assemblies:
        logger.error("None of the assembly files contains correct contigs. "
                     "Please, provide different files or decrease --min-contig threshold.")
        return 4

    # Running QUAST(s)
    if qconfig.gene_finding:
        quast_py_args += ['--mgm']
    if qconfig.min_IDY is None: # special case: user not specified min-IDY, so we need to use MetaQUAST default value
        quast_py_args += ['--min-identity', str(qconfig.META_MIN_IDY)]

    if qconfig.reuse_combined_alignments:
        reuse_combined_alignments = True
    else:
        reuse_combined_alignments = False

    downloaded_refs = False

    # SEARCHING REFERENCES
    if not ref_fpaths:
        logger.main_info()
        if qconfig.max_references == 0:
            logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled")
        else:
            if qconfig.references_txt:
                logger.main_info("List of references was provided, starting to download reference genomes from NCBI...")
            else:
                logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database "
                        "and to download them from NCBI...")
            downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname)
            if not os.path.isdir(downloaded_dirpath):
                os.mkdir(downloaded_dirpath)
            corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
            ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, corrected_dirpath, qconfig.references_txt)
            if ref_fpaths:
                search_references_meta.is_quast_first_run = True
                if not qconfig.references_txt:
                    downloaded_refs = True
                logger.main_info()
                logger.main_info('Downloaded reference(s):')
                corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
                    correct_meta_references(ref_fpaths, corrected_dirpath, downloaded_refs=True)
            elif test_mode and not ref_fpaths:
                logger.error('Failed to download or setup SILVA 16S rRNA database for working without '
                             'references on metagenome datasets!', to_stderr=True, exit_with_code=4)

    if not ref_fpaths:
        # No references, running regular quast with MetaGenemark gene finder
        logger.main_info()
        logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder')
        assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in contigs_fpaths]
        _start_quast_main(quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, run_regular_quast=True)
        exit(0)

    # Running combined reference
    combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name)
    qconfig.reference = combined_ref_fpath

    if qconfig.bed:
        quast_py_args += ['--sv-bed']
        quast_py_args += [qconfig.bed]

    quast_py_args += ['--combined-ref']
    if qconfig.draw_plots or qconfig.html_report:
        if plotter_data.dict_color_and_ls:
            colors_and_ls = [plotter_data.dict_color_and_ls[asm.label] for asm in assemblies]
            quast_py_args += ['--colors']
            quast_py_args += [','.join([style[0] for style in colors_and_ls])]
            quast_py_args += ['--ls']
            quast_py_args += [','.join([style[1] for style in colors_and_ls])]
    run_name = 'for the combined reference'
    logger.main_info()
    logger.main_info('Starting quast.py ' + run_name + '...')
    total_num_notices = 0
    total_num_warnings = 0
    total_num_nf_errors = 0
    total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors)
    if qconfig.html_report:
        from quast_libs.html_saver import json_saver
        json_texts = []
    else:
        json_texts = None
    if qconfig.unique_mapping:
        ambiguity_opts = []
    else:
        ambiguity_opts = ["--ambiguity-usage", 'all']
    return_code, total_num_notifications = \
        _start_quast_main(quast_py_args + ambiguity_opts,
        labels=labels,
        assemblies=assemblies,
        reference_fpath=combined_ref_fpath,
        output_dirpath=combined_output_dirpath,
        num_notifications_tuple=total_num_notifications,
        is_combined_ref=True)

    if json_texts is not None:
        json_texts.append(json_saver.json_text)
    search_references_meta.is_quast_first_run = False

    genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats')
    genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt')
    if not os.path.exists(genome_info_fpath):
        logger.main_info('')
        if not downloaded_refs:
            msg = 'Try to restart MetaQUAST with another references.'
        else:
            msg = 'Try to use option --max-ref-number to change maximum number of references (per each assembly) to download.'
        logger.main_info('Failed aligning the contigs for all the references. ' + msg)
        logger.main_info('')
        cleanup(corrected_dirpath)
        logger.main_info('MetaQUAST finished.')
        return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)

    if downloaded_refs and return_code == 0:
        logger.main_info()
        logger.main_info('Excluding downloaded references with low genome fraction from further analysis..')
        corr_ref_fpaths = get_downloaded_refs_with_alignments(genome_info_fpath, ref_fpaths, chromosomes_by_refs)
        if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths:
            logger.main_info()
            logger.main_info('Filtered reference(s):')
            os.remove(combined_ref_fpath)
            contigs_analyzer.ref_labels_by_chromosomes = OrderedDict()
            corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names = \
                correct_meta_references(corr_ref_fpaths, corrected_dirpath)
            assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels)
            run_name = 'for the corrected combined reference'
            logger.main_info()
            logger.main_info('Starting quast.py ' + run_name + '...')
            return_code, total_num_notifications = \
                _start_quast_main(quast_py_args + ambiguity_opts,
                labels=labels,
                assemblies=assemblies,
                reference_fpath=combined_ref_fpath,
                output_dirpath=combined_output_dirpath,
                num_notifications_tuple=total_num_notifications,
                is_combined_ref=True)
            if json_texts is not None:
                json_texts = json_texts[:-1]
                json_texts.append(json_saver.json_text)
        elif corr_ref_fpaths == ref_fpaths:
            logger.main_info('All downloaded references have genome fraction more than 10%. Nothing was excluded.')
        else:
            logger.main_info('All downloaded references have low genome fraction. Nothing was excluded for now.')

    if return_code != 0:
        logger.main_info('MetaQUAST finished.')
        return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)

    if qconfig.calculate_read_support:
        calculate_ave_read_support(combined_output_dirpath, assemblies)

    prepare_regular_quast_args(quast_py_args, combined_output_dirpath, reuse_combined_alignments)
    logger.main_info()
    logger.main_info('Partitioning contigs into bins aligned to each reference..')

    assemblies_by_reference, not_aligned_assemblies = partition_contigs(
        assemblies, corrected_ref_fpaths, corrected_dirpath,
        os.path.join(combined_output_dirpath, qconfig.detailed_contigs_reports_dirname, 'alignments_%s.tsv'), labels)

    output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname)
    if not qconfig.memory_efficient and \
                    len(assemblies_by_reference) > len(assemblies) and len(assemblies) < qconfig.max_threads:
        logger.main_info()
        logger.main_info('Run QUAST on different references in parallel..')
        threads_per_ref = max(1, qconfig.max_threads // len(assemblies_by_reference))
        quast_py_args += ['--memory-efficient']
        quast_py_args += ['-t', str(threads_per_ref)]

        num_notifications = (0, 0, 0)
        parallel_run_args = [(quast_py_args, output_dirpath_per_ref, ref_fpath, ref_assemblies, num_notifications, True)
                             for ref_fpath, ref_assemblies in assemblies_by_reference]
        ref_names, ref_json_texts, ref_notifications = \
            run_parallel(_run_quast_per_ref, parallel_run_args, qconfig.max_threads, filter_results=True)
        per_ref_num_notifications = list(map(sum, zip(*ref_notifications)))
        total_num_notifications = list(map(sum, zip(total_num_notifications, per_ref_num_notifications)))
        if json_texts is not None:
            json_texts.extend(ref_json_texts)
        quast_py_args.remove('--memory-efficient')
        quast_py_args = remove_from_quast_py_args(quast_py_args, '-t', str(threads_per_ref))
    else:
        ref_names = []
        for ref_fpath, ref_assemblies in assemblies_by_reference:
            ref_name, json_text, total_num_notifications = \
                _run_quast_per_ref(quast_py_args, output_dirpath_per_ref, ref_fpath, ref_assemblies, total_num_notifications)
            if not ref_name:
                continue
            ref_names.append(ref_name)
            if json_texts is not None:
                json_texts.append(json_text)

    # Finally running for the contigs that has not been aligned to any reference
    no_unaligned_contigs = True
    for assembly in not_aligned_assemblies:
        if os.path.isfile(assembly.fpath) and os.stat(assembly.fpath).st_size != 0:
            no_unaligned_contigs = False
            break

    run_name = 'for the contigs not aligned anywhere'
    logger.main_info()
    if no_unaligned_contigs:
        logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)')
    else:
        logger.main_info('Starting quast.py ' + run_name + '... (logging to ' +
                        os.path.join(output_dirpath, qconfig.not_aligned_name, qconfig.LOGGER_DEFAULT_NAME + '.log)'))

        return_code, total_num_notifications = _start_quast_main(quast_py_args + ['-t', str(qconfig.max_threads)],
            assemblies=not_aligned_assemblies,
            output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name),
            num_notifications_tuple=total_num_notifications)

        if return_code not in [0, 4]:
            logger.error('Error running quast.py for the contigs not aligned anywhere')
        elif return_code == 4:  # no unaligned contigs, i.e. everything aligned
            no_unaligned_contigs = True
        if not no_unaligned_contigs:
            if json_texts is not None:
                json_texts.append(json_saver.json_text)

    if ref_names:
        logger.print_timestamp()
        logger.main_info("Summarizing results...")

        summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir)
        if not os.path.isdir(summary_output_dirpath):
            os.makedirs(summary_output_dirpath)
        if html_report and json_texts:
            from quast_libs.html_saver import html_saver
            html_summary_report_fpath = html_saver.init_meta_report(output_dirpath)
        else:
            html_summary_report_fpath = None
        from quast_libs import create_meta_summary
        metrics_for_plots = reporting.Fields.main_metrics
        misassembly_metrics = [reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION,
                              reporting.Fields.MIS_ISTRANSLOCATIONS]
        if no_unaligned_contigs:
            full_ref_names = [qutils.name_from_fpath(ref_fpath) for ref_fpath in corrected_ref_fpaths]
        else:
            full_ref_names = [qutils.name_from_fpath(ref_fpath) for ref_fpath in corrected_ref_fpaths] + [qconfig.not_aligned_name]
        create_meta_summary.do(html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath,
                               output_dirpath_per_ref, metrics_for_plots, misassembly_metrics, full_ref_names)
        if html_report and json_texts:
            html_saver.save_colors(output_dirpath, contigs_fpaths, plotter_data.dict_color_and_ls, meta=True)
            if qconfig.create_icarus_html:
                icarus_html_fpath = html_saver.create_meta_icarus(output_dirpath, ref_names)
                logger.main_info('  Icarus (contig browser) is saved to %s' % icarus_html_fpath)
            html_saver.create_meta_report(output_dirpath, json_texts)

    cleanup(corrected_dirpath)
    logger.main_info('')
    logger.main_info('MetaQUAST finished.')
    return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
Esempio n. 3
0
def parse_options(logger, quast_args):
    mode = get_mode(quast_args[0])
    is_metaquast = True if mode == 'meta' else False
    qconfig.large_genome = True if mode == 'large' else False

    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, mode=mode, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(mode)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--no-portable-html'], dict(
             dest='portable_html',
             action='store_false')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-r', '-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='string',
             action='callback',
             callback_args=(logger, True),
             callback=parse_features)
         ),
        (['-g', '--features'], dict(
             dest='features',
             type='string',
             action='callback',
             callback_args=(logger,),
             callback=parse_features)
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--pe1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['--pe2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--mp1'], dict(
             dest='mp_forward_reads',
             type='file',
             action='extend')
         ),
        (['--mp2'], dict(
             dest='mp_reverse_reads',
             type='file',
             action='extend')
         ),
        (['--12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--pe12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--mp12'], dict(
             dest='mp_interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--single'], dict(
             dest='unpaired_reads',
             type='file',
             action='extend')
         ),
        (['--pacbio'], dict(
             dest='pacbio_reads',
             type='file',
             action='extend')
         ),
        (['--nanopore'], dict(
             dest='nanopore_reads',
             type='file',
             action='extend')
         ),
        (['--ref-sam'], dict(
            dest='reference_sam',
            type='file')
         ),
        (['--ref-bam'], dict(
            dest='reference_bam',
            type='file')
         ),
        (['--sam'], dict(
            dest='sam_fpaths',
            type='string',
            action='callback',
            callback_args=('.sam', logger),
            callback=parse_files_list)
         ),
        (['--bam'], dict(
            dest='bam_fpaths',
            type='string',
            action='callback',
            callback_args=('.bam', logger),
            callback=parse_files_list)
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['--cov'], dict(
             dest='cov_fpath',
             type='file')
         ),
        (['--phys-cov'], dict(
             dest='phys_cov_fpath',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--mgm'], dict(
             dest='metagenemark',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']},
             default=False)
         ),
        (['-s', '--split-scaffolds'], dict(
             dest='split_scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['--fungus'], dict(
             dest='is_fungus',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['is_fungus'],
                              'store_false_values': ['prokaryote']})
         ),
        (['--large'], dict(
             dest='large_genome',
             action='store_true')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--rna-finding'], dict(
             dest='rna_gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['--fragmented-max-indent'], dict(
             dest='fragmented_max_indent',
             type='int',
             default=qconfig.MAX_INDEL_LENGTH,
             action='callback',
             callback=set_fragmented_max_indent,
             callback_args=(logger,))
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--unaligned-part-size'], dict(
             dest='unaligned_part_size',
             type=int)
         ),
        (['--skip-unaligned-mis-contigs'], dict(
            dest='unaligned_mis_threshold',
            action="store_const",
            const=0.0)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['--scaffold-gap-max-size'], dict(
             dest='scaffolds_gap_threshold',
             type=int)
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='store_true',
             default=False)
         ),
        (['-b', '--conserved-genes-finding'], dict(
             dest='run_busco',
             action='store_true',
             default=False)
         ),
        (['-k', '--k-mer-stats'], dict(
             dest='use_kmc',
             action='store_true',
             default=False)
         ),
        (['--k-mer-size'], dict(
             dest='unique_kmer_len',
             type='int')
         ),
        (['--upper-bound-assembly'], dict(
             dest='optimal_assembly',
             action='store_true')
         ),
        (['--upper-bound-min-con'], dict(
             dest='upperbound_min_connections',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 1})
         ),
        (['--est-insert-size'], dict(
             dest='optimal_assembly_insert_size',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': qconfig.optimal_assembly_min_IS,
                              'max_value': qconfig.optimal_assembly_max_IS})
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--use-input-ref-order'], dict(
             dest='use_input_ref_order',
             action='store_true')
         ),
        (['--circos'], dict(
             dest='draw_circos',
             action='store_true')
         ),
        (['--no-read-stats'], dict(
             dest='no_read_stats',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip', 'no_read_stats'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html', 'analyze_gaps']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--space-efficient'], dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['space_efficient'],
                              'store_false_values': ['show_snps', 'create_icarus_html']},)
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             ),
            (['--blast-db'], dict(
                 dest='custom_blast_db_fpath')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg)

    if qconfig.glimmer and qconfig.gene_finding:
        logger.error("You cannot use --glimmer and " + ("--mgm" if qconfig.metagenemark else "--gene-finding") + \
                     " simultaneously!", exit_with_code=3)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
                      'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.features = test_features
                qconfig.operons = test_operons
                qconfig.glimmer = True
                if not qconfig.large_genome:  # special case -- large mode imposes eukaryote gene finding (GeneMark-ES) and our test data is too small for it.
                    qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n\n'
                'If you still want to run tests, please download and unpack test data to CWD:\n'
                '  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n')
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n", to_stderr=True)
        qconfig.usage(stream=sys.stderr)
        sys.exit(2)

    if qconfig.large_genome:
        set_large_genome_parameters()

    if qconfig.extensive_misassembly_threshold is None:
        qconfig.extensive_misassembly_threshold = \
            qconfig.LARGE_EXTENSIVE_MIS_THRESHOLD if qconfig.large_genome else qconfig.DEFAULT_EXT_MIS_SIZE
    if qconfig.min_contig is None:
        qconfig.min_contig = qconfig.LARGE_MIN_CONTIG if qconfig.large_genome else qconfig.DEFAULT_MIN_CONTIG
    if qconfig.min_alignment is None:
        qconfig.min_alignment = qconfig.LARGE_MIN_ALIGNMENT if qconfig.large_genome else qconfig.DEFAULT_MIN_ALIGNMENT

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' +
                      'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_quast_dir = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_quast_dir:
        logger.notice("Output directory already exists and looks like a QUAST output dir. "
                      "Existing results can be reused (e.g. previously generated alignments)!")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    if qconfig.sam_fpaths or qconfig.bam_fpaths:
        check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

    return quast_py_args, contigs_fpaths
Esempio n. 4
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args,
                      meta=is_metaquast,
                      short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(dest='debug', action='store_true')),
        (['--no-portable-html'],
         dict(dest='portable_html', action='store_false')),
        (['--test'], dict(dest='test', action='store_true')),
        (['--test-sv'], dict(dest='test_sv', action='store_true')),
        (['--test-no-ref'], dict(dest='test_no_ref', action='store_true')),
        (['-o', '--output-dir'],
         dict(dest='output_dirpath',
              type='string',
              action='callback',
              callback=check_output_dir,
              callback_args=(logger, ))),
        (['-t', '--threads'],
         dict(dest='max_threads',
              type='int',
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'default_value': 1,
                  'min_value': 1
              })),
        (['-R', '--reference'],
         dict(dest='reference',
              type='string' if is_metaquast else 'file',
              action='callback' if is_metaquast else 'store',
              callback_args=(logger, ) if is_metaquast else None,
              callback=parse_meta_references if is_metaquast else None)),
        (['-G', '--genes'], dict(dest='genes', type='file', action='extend')),
        (['-O', '--operons'], dict(dest='operons',
                                   type='file',
                                   action='extend')),
        (['-1', '--reads1'], dict(dest='forward_reads', type='file')),
        (['-2', '--reads2'], dict(dest='reverse_reads', type='file')),
        (['--sam'], dict(dest='sam', type='file')),
        (['--bam'], dict(dest='bam', type='file')),
        (['--sv-bedpe'], dict(dest='bed', type='file')),
        (['--cov'], dict(dest='cov_fpath', type='file')),
        (['--phys-cov'], dict(dest='phys_cov_fpath', type='file')),
        (['-l', '--labels'], dict(dest='labels', type='string')),
        (['-L'], dict(dest='all_labels_from_dirs', action='store_true')),
        (['--mgm'],
         dict(dest='metagenemark',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['gene_finding', 'metagenemark']
              },
              default=False)),
        (['-s', '--scaffolds'], dict(dest='scaffolds', action='store_true')),
        (['-e', '--eukaryote'], dict(dest='prokaryote', action='store_false')),
        (['-f',
          '--gene-finding'], dict(dest='gene_finding', action='store_true')),
        (['--fragmented'],
         dict(dest='check_for_fragmented_ref', action='store_true')),
        (['--fragmented-max-indent'],
         dict(dest='fragmented_max_indent',
              type='int',
              default=qconfig.MAX_INDEL_LENGTH,
              action='callback',
              callback=set_fragmented_max_indent,
              callback_args=(logger, ))),
        (['-a', '--ambiguity-usage'],
         dict(dest='ambiguity_usage',
              type='string',
              default=qconfig.ambiguity_usage,
              action='callback',
              callback=check_str_arg_value,
              callback_args=(logger, ),
              callback_kwargs={'available_values': ['none', 'one', 'all']})),
        (['--ambiguity-score'],
         dict(dest='ambiguity_score',
              type='float',
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'min_value': 0.8,
                  'max_value': 1.0
              })),
        (['-u', '--use-all-alignments'],
         dict(dest='use_all_alignments', action='store_true')),
        (['--strict-NA'], dict(dest='strict_NA', action='store_true')),
        (['--unaligned-part-size'], dict(dest='unaligned_part_size',
                                         type=int)),
        (['-x', '--extensive-mis-size'],
         dict(dest='extensive_misassembly_threshold',
              type='int',
              default=qconfig.extensive_misassembly_threshold,
              action='callback',
              callback=set_extensive_mis_size,
              callback_args=(logger, ))),
        (['--scaffold-gap-max-size'],
         dict(dest='scaffolds_gap_threshold', type=int)),
        (['-m', '--min-contig'], dict(dest='min_contig', type='int')),
        (['-c', '--min-cluster'], dict(dest='min_cluster', type='int')),
        (['-i', '--min-alignment'], dict(dest='min_alignment', type='int')),
        (['--min-identity'],
         dict(dest='min_IDY',
              type='float',
              default=qconfig.min_IDY,
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'min_value': 80.0,
                  'max_value': 100.0
              })),
        (['--est-ref-size'], dict(dest='estimated_reference_size',
                                  type='int')),
        (['--contig-thresholds'], dict(dest='contig_thresholds')),
        (['--gene-thresholds'], dict(dest='genes_lengths')),
        (['--gage'], dict(dest='with_gage', action='store_true')),
        (['--glimmer'],
         dict(dest='glimmer',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['gene_finding', 'glimmer']
              },
              default=False)),
        (['--plots-format'],
         dict(dest='plot_extension',
              type='string',
              action='callback',
              callback=check_str_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'available_values': qconfig.supported_plot_extensions
              })),
        (['--use-input-ref-order'],
         dict(dest='use_input_ref_order', action='store_true')),
        (['--svg'], dict(dest='draw_svg', action='store_true')),
        (['--fast'],
         dict(dest='fast',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                  'store_false_values': [
                      'show_snps', 'draw_plots', 'html_report',
                      'create_icarus_html'
                  ]
              },
              default=False)),
        (['--no-gzip'], dict(dest='no_gzip', action='store_true')),
        (['--no-check'], dict(dest='no_check', action='store_true')),
        (['--no-check-meta'],
         dict(dest='no_check_meta',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['no_check', 'no_check_meta']
              })), (['--no-snps'], dict(dest='show_snps',
                                        action='store_false')),
        (['--no-plots'], dict(dest='draw_plots', action='store_false')),
        (['--no-html'],
         dict(dest='html_report',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_false_values': ['html_report', 'create_icarus_html']
              })),
        (['--no-icarus'], dict(dest='create_icarus_html',
                               action='store_false')),
        (['--no-gc'], dict(dest='no_gc', action='store_true')),
        (['--no-sv'], dict(dest='no_sv', action='store_true')),
        (['--memory-efficient'],
         dict(dest='memory_efficient', action='store_true')),
        (['--space-efficient'],
         dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={
                 'store_true_values': ['space_efficient'],
                 'store_false_values': ['create_icarus_html']
             },
         )),
        (['--force-nucmer'], dict(dest='force_nucmer', action='store_true')),
        (['--silent'], dict(dest='silent', action='store_true')),
        (['--combined-ref'], dict(dest='is_combined_ref',
                                  action='store_true')),
        (['--colors'], dict(dest='used_colors', action='extend')),
        (['--ls'], dict(dest='used_ls', action='extend')),
        (['-j', '--save-json'], dict(dest='save_json', action='store_true')),
        (['-J', '--save-json-to'], dict(dest='json_output_dirpath')),
        (['--err-fpath'], dict(dest='error_log_fpath')),
        (['--read-support'],
         dict(dest='calculate_read_support', action='store_true'))
    ]
    if is_metaquast:
        options += [(['--unique-mapping'],
                     dict(dest='unique_mapping', action='store_true')),
                    (['--max-ref-number'],
                     dict(dest='max_references',
                          type='int',
                          action='callback',
                          callback=check_arg_value,
                          callback_args=(logger, ),
                          callback_kwargs={
                              'default_value': qconfig.max_references,
                              'min_value': 0
                          })),
                    (['--references-list'], dict(dest='references_txt')),
                    (['--blast-db'], dict(dest='custom_blast_db_fpath'))]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(
            qconfig.output_dirpath, 'You are trying to run QUAST from ' +
            str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
            'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.glimmer = True
                qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n\n'
                'If you still want to run tests, please download and unpack test data to CWD:\n'
                '  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n'
            )
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n",
                     to_stderr=True)
        qconfig.usage(meta=is_metaquast, stream=sys.stderr)
        sys.exit(2)

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(
            os.getcwd(),
            'An output path was not specified manually. You are trying to run QUAST from '
            + str(os.getcwd()) + '.\n' +
            'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice(
            "Output directory already exists. Existing Nucmer alignments can be used"
        )
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels,
                                           qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [
            int(x) for x in qconfig.contig_thresholds.split(",")
        ]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [
            int(x) for x in qconfig.genes_lengths.split(",")
        ]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice(
                "--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified"
            )

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    return quast_py_args, contigs_fpaths
Esempio n. 5
0
def main(args):
    if ' ' in qconfig.QUAST_HOME:
        logger.error(
            'QUAST does not support spaces in paths. \n'
            'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n'
            'Please, put QUAST in a different directory, then try again.\n',
            to_stderr=True,
            exit_with_code=3)

    if not args:
        qconfig.usage(meta=True)
        sys.exit(0)

    metaquast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger,
                                                  metaquast_path + args,
                                                  is_metaquast=True)
    output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    html_report = qconfig.html_report
    test_mode = qconfig.test

    # Directories
    output_dirpath, _, _ = qutils.set_up_output_dir(output_dirpath,
                                                    None,
                                                    not output_dirpath,
                                                    save_json=False)

    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)

    qconfig.set_max_threads(logger)
    qutils.logger = logger

    ########################################################################

    from quast_libs import reporting
    reload(reporting)
    from quast_libs import plotter

    if os.path.isdir(corrected_dirpath):
        shutil.rmtree(corrected_dirpath)
    os.mkdir(corrected_dirpath)

    # PROCESSING REFERENCES
    if ref_fpaths:
        logger.main_info()
        logger.main_info('Reference(s):')

        corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
            correct_meta_references(ref_fpaths, corrected_dirpath)

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')
    qconfig.no_check_meta = True
    assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath,
                                            labels)
    if not assemblies:
        logger.error(
            "None of the assembly files contains correct contigs. "
            "Please, provide different files or decrease --min-contig threshold."
        )
        return 4

    # Running QUAST(s)
    quast_py_args += ['--meta']
    downloaded_refs = False

    # SEARCHING REFERENCES
    if not ref_fpaths:
        logger.main_info()
        if qconfig.max_references == 0:
            logger.notice(
                "Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled"
            )
        else:
            if qconfig.references_txt:
                logger.main_info(
                    "List of references was provided, starting to download reference genomes from NCBI..."
                )
            else:
                logger.main_info(
                    "No references are provided, starting to search for reference genomes in SILVA 16S rRNA database "
                    "and to download them from NCBI...")
            downloaded_dirpath = os.path.join(output_dirpath,
                                              qconfig.downloaded_dirname)
            if not os.path.isdir(downloaded_dirpath):
                os.mkdir(downloaded_dirpath)
            ref_fpaths = search_references_meta.do(assemblies, labels,
                                                   downloaded_dirpath,
                                                   qconfig.references_txt)
            if ref_fpaths:
                search_references_meta.is_quast_first_run = True
                if not qconfig.references_txt:
                    downloaded_refs = True
                logger.main_info()
                logger.main_info('Downloaded reference(s):')
                corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\
                    correct_meta_references(ref_fpaths, corrected_dirpath)
            elif test_mode and not ref_fpaths:
                logger.error(
                    'Failed to download or setup SILVA 16S rRNA database for working without '
                    'references on metagenome datasets!',
                    to_stderr=True,
                    exit_with_code=4)

    if not ref_fpaths:
        # No references, running regular quast with MetaGenemark gene finder
        logger.main_info()
        logger.notice(
            'No references are provided, starting regular QUAST with MetaGeneMark gene finder'
        )
        _start_quast_main(quast_py_args,
                          assemblies=assemblies,
                          output_dirpath=output_dirpath)
        exit(0)

    # Running combined reference
    combined_output_dirpath = os.path.join(output_dirpath,
                                           qconfig.combined_output_name)

    reads_fpaths = []
    if qconfig.forward_reads:
        reads_fpaths.append(qconfig.forward_reads)
    if qconfig.reverse_reads:
        reads_fpaths.append(qconfig.reverse_reads)
    if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpaths:
        bed_fpath, cov_fpath, _ = reads_analyzer.do(
            combined_ref_fpath,
            contigs_fpaths,
            reads_fpaths,
            corrected_ref_fpaths,
            os.path.join(combined_output_dirpath, qconfig.variation_dirname),
            external_logger=logger,
            sam_fpath=qconfig.sam,
            bam_fpath=qconfig.bam,
            bed_fpath=qconfig.bed)
        qconfig.bed = bed_fpath

    if qconfig.bed:
        quast_py_args += ['--sv-bed']
        quast_py_args += [qconfig.bed]
    if qconfig.sam:
        quast_py_args += ['--sam']
        quast_py_args += [qconfig.sam]
    if qconfig.bam:
        quast_py_args += ['--bam']
        quast_py_args += [qconfig.bam]

    quast_py_args += ['--combined-ref']
    if qconfig.draw_plots or qconfig.html_report:
        if plotter.dict_color_and_ls:
            colors_and_ls = [
                plotter.dict_color_and_ls[asm.label] for asm in assemblies
            ]
            quast_py_args += ['--colors']
            quast_py_args += [','.join([style[0] for style in colors_and_ls])]
            quast_py_args += ['--ls']
            quast_py_args += [','.join([style[1] for style in colors_and_ls])]
    run_name = 'for the combined reference'
    logger.main_info()
    logger.main_info('Starting quast.py ' + run_name + '...')
    total_num_notices = 0
    total_num_warnings = 0
    total_num_nf_errors = 0
    total_num_notifications = (total_num_notices, total_num_warnings,
                               total_num_nf_errors)
    if qconfig.html_report:
        from quast_libs.html_saver import json_saver
        json_texts = []
    else:
        json_texts = None
    return_code, total_num_notifications, assemblies, labels = \
        _start_quast_main(quast_py_args + ([] if qconfig.unique_mapping else ["--ambiguity-usage", 'all']),
        assemblies=assemblies,
        reference_fpath=combined_ref_fpath,
        output_dirpath=combined_output_dirpath,
        num_notifications_tuple=total_num_notifications, is_first_run=True)

    if json_texts is not None:
        json_texts.append(json_saver.json_text)
    search_references_meta.is_quast_first_run = False

    genome_info_dirpath = os.path.join(output_dirpath,
                                       qconfig.combined_output_name,
                                       'genome_stats')
    genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt')
    if not os.path.exists(genome_info_fpath):
        logger.main_info('')
        logger.main_info(
            'Failed aligning the contigs for all the references. ' +
            ('Try to restart MetaQUAST with another references.'
             if not downloaded_refs else
             'Try to use option --max-ref-number to change maximum number of references '
             '(per each assembly) to download.'))
        logger.main_info('')
        cleanup(corrected_dirpath)
        logger.main_info('MetaQUAST finished.')
        logger.finish_up(numbers=tuple(total_num_notifications),
                         check_test=test_mode)
        return

    if downloaded_refs:
        logger.main_info()
        logger.main_info(
            'Excluding downloaded references with low genome fraction from further analysis..'
        )
        corr_ref_fpaths = get_downloaded_refs_with_alignments(
            genome_info_fpath, ref_fpaths, chromosomes_by_refs)
        if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths:
            logger.main_info()
            logger.main_info('Filtered reference(s):')
            os.remove(combined_ref_fpath)
            contigs_analyzer.ref_labels_by_chromosomes = {}
            corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names = \
                correct_meta_references(corr_ref_fpaths, corrected_dirpath)
            run_name = 'for the corrected combined reference'
            logger.main_info()
            logger.main_info('Starting quast.py ' + run_name + '...')
            return_code, total_num_notifications, assemblies, labels = \
                _start_quast_main(quast_py_args + ([] if qconfig.unique_mapping else ["--ambiguity-usage", 'all']),
                assemblies=assemblies,
                reference_fpath=combined_ref_fpath,
                output_dirpath=combined_output_dirpath,
                num_notifications_tuple=total_num_notifications, is_first_run=True)
            if json_texts is not None:
                json_texts = json_texts[:-1]
                json_texts.append(json_saver.json_text)
        elif corr_ref_fpaths == ref_fpaths:
            logger.main_info(
                'All downloaded references have genome fraction more than 10%. Nothing was excluded.'
            )
        else:
            logger.main_info(
                'All downloaded references have low genome fraction. Nothing was excluded for now.'
            )

    if qconfig.calculate_read_support:
        calculate_ave_read_support(combined_output_dirpath, assemblies)

    for arg in args:
        if arg in ('-s', "--scaffolds"):
            quast_py_args.remove(arg)
    quast_py_args += ['--no-check-meta']
    qconfig.contig_thresholds = ','.join([
        str(threshold) for threshold in qconfig.contig_thresholds
        if threshold > qconfig.min_contig
    ])
    if not qconfig.contig_thresholds:
        qconfig.contig_thresholds = 'None'
    quast_py_args = remove_from_quast_py_args(quast_py_args,
                                              '--contig-thresholds',
                                              qconfig.contig_thresholds)
    quast_py_args += ['--contig-thresholds']
    quast_py_args += [qconfig.contig_thresholds]
    quast_py_args.remove('--combined-ref')

    logger.main_info()
    logger.main_info(
        'Partitioning contigs into bins aligned to each reference..')

    assemblies_by_reference, not_aligned_assemblies = partition_contigs(
        assemblies, corrected_ref_fpaths, corrected_dirpath,
        os.path.join(combined_output_dirpath, 'contigs_reports',
                     'alignments_%s.tsv'), labels)

    ref_names = []
    output_dirpath_per_ref = os.path.join(output_dirpath,
                                          qconfig.per_ref_dirname)
    for ref_fpath, ref_assemblies in assemblies_by_reference:
        ref_name = qutils.name_from_fpath(ref_fpath)
        logger.main_info('')
        if not ref_assemblies:
            logger.main_info('No contigs were aligned to the reference ' +
                             ref_name + ', skipping..')
        else:
            ref_names.append(ref_name)
            run_name = 'for the contigs aligned to ' + ref_name
            logger.main_info('Starting quast.py ' + run_name)

            return_code, total_num_notifications = _start_quast_main(
                quast_py_args,
                assemblies=ref_assemblies,
                reference_fpath=ref_fpath,
                output_dirpath=os.path.join(output_dirpath_per_ref, ref_name),
                num_notifications_tuple=total_num_notifications)
            if json_texts is not None:
                json_texts.append(json_saver.json_text)

    # Finally running for the contigs that has not been aligned to any reference
    no_unaligned_contigs = True
    for assembly in not_aligned_assemblies:
        if os.path.isfile(
                assembly.fpath) and os.stat(assembly.fpath).st_size != 0:
            no_unaligned_contigs = False
            break

    run_name = 'for the contigs not aligned anywhere'
    logger.main_info()
    if no_unaligned_contigs:
        logger.main_info('Skipping quast.py ' + run_name +
                         ' (everything is aligned!)')
    else:
        logger.main_info('Starting quast.py ' + run_name + '...')

        return_code, total_num_notifications = _start_quast_main(
            quast_py_args,
            assemblies=not_aligned_assemblies,
            output_dirpath=os.path.join(output_dirpath,
                                        qconfig.not_aligned_name),
            num_notifications_tuple=total_num_notifications)

        if return_code not in [0, 4]:
            logger.error(
                'Error running quast.py for the contigs not aligned anywhere')
        elif return_code == 4:  # no unaligned contigs, i.e. everything aligned
            no_unaligned_contigs = True
        if not no_unaligned_contigs:
            if json_texts is not None:
                json_texts.append(json_saver.json_text)

    if ref_names:
        logger.print_timestamp()
        logger.main_info("Summarizing results...")

        summary_output_dirpath = os.path.join(output_dirpath,
                                              qconfig.meta_summary_dir)
        if not os.path.isdir(summary_output_dirpath):
            os.makedirs(summary_output_dirpath)
        if html_report and json_texts:
            from quast_libs.html_saver import html_saver
            html_summary_report_fpath = html_saver.init_meta_report(
                output_dirpath)
        else:
            html_summary_report_fpath = None
        from quast_libs import create_meta_summary
        metrics_for_plots = reporting.Fields.main_metrics
        misassembl_metrics = [
            reporting.Fields.MIS_RELOCATION,
            reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION,
            reporting.Fields.MIS_ISTRANSLOCATIONS
        ]
        create_meta_summary.do(
            html_summary_report_fpath, summary_output_dirpath,
            combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots,
            misassembl_metrics,
            ref_names if no_unaligned_contigs else ref_names +
            [qconfig.not_aligned_name])
        if html_report and json_texts:
            html_saver.save_colors(output_dirpath,
                                   contigs_fpaths,
                                   plotter.dict_color_and_ls,
                                   meta=True)
            if qconfig.create_icarus_html:
                icarus_html_fpath = html_saver.create_meta_icarus(
                    output_dirpath, ref_names)
                logger.main_info('  Icarus (contig browser) is saved to %s' %
                                 icarus_html_fpath)
            html_saver.create_meta_report(output_dirpath, json_texts)

    cleanup(corrected_dirpath)
    logger.main_info('')
    logger.main_info('MetaQUAST finished.')
    return logger.finish_up(numbers=tuple(total_num_notifications),
                            check_test=test_mode)
Esempio n. 6
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--no-portable-html'], dict(
             dest='portable_html',
             action='store_false')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='file',
             action='extend')
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--pe1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['--pe2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--mp1'], dict(
             dest='mp_forward_reads',
             type='file',
             action='extend')
         ),
        (['--mp2'], dict(
             dest='mp_reverse_reads',
             type='file',
             action='extend')
         ),
        (['--12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--pe12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--mp12'], dict(
             dest='mp_interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--single'], dict(
             dest='unpaired_reads',
             type='file',
             action='extend')
         ),
        (['--ref-sam'], dict(
            dest='reference_sam',
            type='file')
         ),
        (['--ref-bam'], dict(
            dest='reference_bam',
            type='file')
         ),
        (['--sam'], dict(
            dest='sam_fpaths',
            type='string',
            action='callback',
            callback_args=('.sam', logger),
            callback=parse_files_list)
         ),
        (['--bam'], dict(
            dest='bam_fpaths',
            type='string',
            action='callback',
            callback_args=('.bam', logger),
            callback=parse_files_list)
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['--cov'], dict(
             dest='cov_fpath',
             type='file')
         ),
        (['--phys-cov'], dict(
             dest='phys_cov_fpath',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--mgm'], dict(
             dest='metagenemark',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']},
             default=False)
         ),
        (['-s', '--scaffolds'], dict(
             dest='scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['--fungus'], dict(
             dest='is_fungus',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['is_fungus'],
                              'store_false_values': ['prokaryote']})
         ),
        (['--large'], dict(
             dest='large_genome',
             action='store_true')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--rna-finding'], dict(
             dest='rna_gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['--fragmented-max-indent'], dict(
             dest='fragmented_max_indent',
             type='int',
             default=qconfig.MAX_INDEL_LENGTH,
             action='callback',
             callback=set_fragmented_max_indent,
             callback_args=(logger,))
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--unaligned-part-size'], dict(
             dest='unaligned_part_size',
             type=int)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['--scaffold-gap-max-size'], dict(
             dest='scaffolds_gap_threshold',
             type=int)
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-c', '--min-cluster'], dict(
             dest='min_cluster',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']},
             default=False)
         ),
        (['-b', '--find-conserved-genes'], dict(
             dest='run_busco',
             action='store_true',
             default=False)
         ),
        (['--ideal_assembly'], dict(
             dest='ideal_assembly',
             action='store_true')
         ),
        (['--est-insert-size'], dict(
             dest='ideal_assembly_insert_size',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': qconfig.ideal_assembly_min_IS,
                              'max_value': qconfig.ideal_assembly_max_IS})
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--use-input-ref-order'], dict(
             dest='use_input_ref_order',
             action='store_true')
         ),
        (['--svg'], dict(
             dest='draw_svg',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--space-efficient'], dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['space_efficient'],
                              'store_false_values': ['create_icarus_html']},)
         ),
        (['--force-nucmer'], dict(
             dest='force_nucmer',
             action='store_true')
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             ),
            (['--blast-db'], dict(
                 dest='custom_blast_db_fpath')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
                  'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.glimmer = True
                qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True
        
        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n'
                'If you still want to run tests, please download test_data directory from \n'
                'https://github.com/ablab/quast/ to CWD, or install QUAST from source:\n'
                'git clone https://github.com/ablab/quast && cd quast && ./setup.py install\n')
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n", to_stderr=True)
        qconfig.usage(meta=is_metaquast, stream=sys.stderr)
        sys.exit(2)

    if qconfig.large_genome:
        set_large_genome_parameters()

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' +
                  'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice("Output directory already exists. Existing Nucmer alignments can be used")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    if qconfig.sam_fpaths or qconfig.bam_fpaths:
        check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

    return quast_py_args, contigs_fpaths
Esempio n. 7
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='file',
             action='extend')
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file')
         ),
        (['--sam'], dict(
             dest='sam',
             type='file')
         ),
        (['--bam'], dict(
             dest='bam',
             type='file')
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--meta'], dict(
             dest='meta',
             action='store_true')
         ),
        (['-s', '--scaffolds'], dict(
             dest='scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--significant-part-size'], dict(
             dest='significant_part_size',
             type=int)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-c', '--min-cluster'], dict(
             dest='min_cluster',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--gage'], dict(
             dest='with_gage',
             action='store_true')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']},
             default=False)
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--svg'], dict(
             dest='draw_svg',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.with_gage = True
                qconfig.glimmer = True
                qconfig.gene_finding = True
                qconfig.prokaryote = False
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n")
        qconfig.usage(meta=is_metaquast)
        sys.exit(2)

    logger.set_up_console_handler(debug=qconfig.debug)

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice("Output directory already exists. Existing Nucmer alignments can be used")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = map(int, qconfig.contig_thresholds.split(","))
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = map(int, qconfig.genes_lengths.split(","))

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    return quast_py_args, contigs_fpaths