Beispiel #1
0
    def print_params(self, indent='',
                           wrap_after=80, only_if_debug=False):
        self._logger.info("CWD: " + os.getcwd())
        self._logger.info("Main parameters: ")
        text = '  '
        line = indent
        options = [('MODE', qconfig.get_mode()),
                   ('threads', qconfig.max_threads), ('eukaryotic', not qconfig.prokaryote),
                   ('split scaffolds', qconfig.split_scaffolds), ('min contig length', qconfig.min_contig),
                   ('min alignment length', qconfig.min_alignment), ('min alignment IDY', qconfig.min_IDY),
                   ('ambiguity', qconfig.ambiguity_usage), ('use all alignments', qconfig.use_all_alignments),
                   ('threshold for extensive misassembly size', qconfig.extensive_misassembly_threshold)]
        for i, (option, value) in enumerate(options):
            if value is not False:
                line += option + ': ' + str(value).lower()

                if i == len(options) - 1:
                    text += line

                elif wrap_after is not None and len(line) > wrap_after:
                    text += line + ', \\\n'
                    line = ' ' * len(indent) + '  '

                else:
                    line += ', '

        self._logger.info(text)
Beispiel #2
0
def parse_options(logger, quast_args):
    mode = get_mode(quast_args[0])
    is_metaquast = True if mode == 'meta' else False
    qconfig.large_genome = True if mode == 'large' else False

    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, mode=mode, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(mode)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--no-portable-html'], dict(
             dest='portable_html',
             action='store_false')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-r', '-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='string',
             action='callback',
             callback_args=(logger, True),
             callback=parse_features)
         ),
        (['-g', '--features'], dict(
             dest='features',
             type='string',
             action='callback',
             callback_args=(logger,),
             callback=parse_features)
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--pe1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['--pe2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--mp1'], dict(
             dest='mp_forward_reads',
             type='file',
             action='extend')
         ),
        (['--mp2'], dict(
             dest='mp_reverse_reads',
             type='file',
             action='extend')
         ),
        (['--12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--pe12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--mp12'], dict(
             dest='mp_interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--single'], dict(
             dest='unpaired_reads',
             type='file',
             action='extend')
         ),
        (['--pacbio'], dict(
             dest='pacbio_reads',
             type='file',
             action='extend')
         ),
        (['--nanopore'], dict(
             dest='nanopore_reads',
             type='file',
             action='extend')
         ),
        (['--ref-sam'], dict(
            dest='reference_sam',
            type='file')
         ),
        (['--ref-bam'], dict(
            dest='reference_bam',
            type='file')
         ),
        (['--sam'], dict(
            dest='sam_fpaths',
            type='string',
            action='callback',
            callback_args=('.sam', logger),
            callback=parse_files_list)
         ),
        (['--bam'], dict(
            dest='bam_fpaths',
            type='string',
            action='callback',
            callback_args=('.bam', logger),
            callback=parse_files_list)
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['--cov'], dict(
             dest='cov_fpath',
             type='file')
         ),
        (['--phys-cov'], dict(
             dest='phys_cov_fpath',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--mgm'], dict(
             dest='metagenemark',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']},
             default=False)
         ),
        (['-s', '--split-scaffolds'], dict(
             dest='split_scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['--fungus'], dict(
             dest='is_fungus',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['is_fungus'],
                              'store_false_values': ['prokaryote']})
         ),
        (['--large'], dict(
             dest='large_genome',
             action='store_true')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--rna-finding'], dict(
             dest='rna_gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['--fragmented-max-indent'], dict(
             dest='fragmented_max_indent',
             type='int',
             default=qconfig.MAX_INDEL_LENGTH,
             action='callback',
             callback=set_fragmented_max_indent,
             callback_args=(logger,))
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--unaligned-part-size'], dict(
             dest='unaligned_part_size',
             type=int)
         ),
        (['--skip-unaligned-mis-contigs'], dict(
            dest='unaligned_mis_threshold',
            action="store_const",
            const=0.0)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['--scaffold-gap-max-size'], dict(
             dest='scaffolds_gap_threshold',
             type=int)
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='store_true',
             default=False)
         ),
        (['-b', '--conserved-genes-finding'], dict(
             dest='run_busco',
             action='store_true',
             default=False)
         ),
        (['-k', '--k-mer-stats'], dict(
             dest='use_kmc',
             action='store_true',
             default=False)
         ),
        (['--k-mer-size'], dict(
             dest='unique_kmer_len',
             type='int')
         ),
        (['--upper-bound-assembly'], dict(
             dest='optimal_assembly',
             action='store_true')
         ),
        (['--upper-bound-min-con'], dict(
             dest='upperbound_min_connections',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 1})
         ),
        (['--est-insert-size'], dict(
             dest='optimal_assembly_insert_size',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': qconfig.optimal_assembly_min_IS,
                              'max_value': qconfig.optimal_assembly_max_IS})
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--use-input-ref-order'], dict(
             dest='use_input_ref_order',
             action='store_true')
         ),
        (['--circos'], dict(
             dest='draw_circos',
             action='store_true')
         ),
        (['--no-read-stats'], dict(
             dest='no_read_stats',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip', 'no_read_stats'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html', 'analyze_gaps']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--space-efficient'], dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['space_efficient'],
                              'store_false_values': ['show_snps', 'create_icarus_html']},)
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             ),
            (['--blast-db'], dict(
                 dest='custom_blast_db_fpath')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg)

    if qconfig.glimmer and qconfig.gene_finding:
        logger.error("You cannot use --glimmer and " + ("--mgm" if qconfig.metagenemark else "--gene-finding") + \
                     " simultaneously!", exit_with_code=3)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
                      'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.features = test_features
                qconfig.operons = test_operons
                qconfig.glimmer = True
                if not qconfig.large_genome:  # special case -- large mode imposes eukaryote gene finding (GeneMark-ES) and our test data is too small for it.
                    qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n\n'
                'If you still want to run tests, please download and unpack test data to CWD:\n'
                '  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n')
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n", to_stderr=True)
        qconfig.usage(stream=sys.stderr)
        sys.exit(2)

    if qconfig.large_genome:
        set_large_genome_parameters()

    if qconfig.extensive_misassembly_threshold is None:
        qconfig.extensive_misassembly_threshold = \
            qconfig.LARGE_EXTENSIVE_MIS_THRESHOLD if qconfig.large_genome else qconfig.DEFAULT_EXT_MIS_SIZE
    if qconfig.min_contig is None:
        qconfig.min_contig = qconfig.LARGE_MIN_CONTIG if qconfig.large_genome else qconfig.DEFAULT_MIN_CONTIG
    if qconfig.min_alignment is None:
        qconfig.min_alignment = qconfig.LARGE_MIN_ALIGNMENT if qconfig.large_genome else qconfig.DEFAULT_MIN_ALIGNMENT

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' +
                      'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_quast_dir = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_quast_dir:
        logger.notice("Output directory already exists and looks like a QUAST output dir. "
                      "Existing results can be reused (e.g. previously generated alignments)!")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    if qconfig.sam_fpaths or qconfig.bam_fpaths:
        check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

    return quast_py_args, contigs_fpaths