Beispiel #1
0
def parallel_blast(contigs_fpath, label, blast_res_fpath, err_fpath, blast_check_fpath, blast_threads):
    cmd = blast_fpath('blastn') + (' -query %s -db %s -outfmt 7 -num_threads %s' % (
            contigs_fpath, db_fpath, blast_threads))
    res_fpath = blast_res_fpath + '_' + label
    check_fpath = blast_check_fpath + '_' + label
    logger.info('  ' + 'processing ' + label)
    qutils.call_subprocess(shlex.split(cmd), stdout=open(res_fpath, 'w'), stderr=open(err_fpath, 'a'), logger=logger)
    logger.info('  ' + 'BLAST results for %s are saved to %s...' % (label, res_fpath))
    with open(check_fpath, 'w') as check_file:
        check_file.writelines('Assembly: %s size: %d\n' % (contigs_fpath, os.path.getsize(contigs_fpath)))
    return
Beispiel #2
0
def download_blastdb():
    if os.path.isfile(db_fpath + '.nsq'):
        logger.info()
        logger.info('SILVA 16S rRNA database has already been downloaded, unpacked and BLAST database created. '
                    'If not, please remove %s and rerun MetaQUAST' % db_fpath + '.nsq')
        return 0
    log_fpath = os.path.join(blastdb_dirpath, 'blastdb.log')
    db_gz_fpath = os.path.join(blastdb_dirpath, silva_fname + '.gz')
    silva_fpath = os.path.join(blastdb_dirpath, silva_fname)

    logger.info()
    if os.path.isfile(db_gz_fpath):
        logger.info('SILVA 16S ribosomal RNA gene database has already been downloaded.')
    else:
        logger.info('Downloading SILVA 16S ribosomal RNA gene database...')
        if not os.path.isdir(blastdb_dirpath):
            os.mkdir(blastdb_dirpath)
        silva_download = urllib.URLopener()
        silva_remote_fpath = silva_db_path + silva_fname + '.gz'
        try:
            silva_download.retrieve(silva_remote_fpath, db_gz_fpath + '.download', show_progress)
        except Exception:
            logger.error(
                'Failed downloading SILVA 16S rRNA gene database (%s)! The search for reference genomes cannot be performed. '
                'Try to download it manually in %s and restart MetaQUAST.' % (silva_remote_fpath, blastdb_dirpath))
            return 1
        shutil.move(db_gz_fpath + '.download', db_gz_fpath)

    logger.info('Processing downloaded file. Logging to %s...' % log_fpath)
    if not os.path.isfile(silva_fpath):
        logger.info('Unpacking and replacing " " with "_"...')

        unpacked_fpath = silva_fpath + ".unpacked"
        cmd = "gunzip -c %s" % db_gz_fpath
        qutils.call_subprocess(shlex.split(cmd), stdout=open(unpacked_fpath, 'w'), stderr=open(log_fpath, 'a'), logger=logger)

        in_file = open(unpacked_fpath).read()
        out_file = open(unpacked_fpath, 'w')
        out_file.write(in_file.replace(' ', '_'))
        out_file.close()
        shutil.move(unpacked_fpath, silva_fpath)

    logger.info('Making BLAST database...')
    cmd = blast_fpath('makeblastdb') + (' -in %s -dbtype nucl -out %s' % (silva_fpath, db_fpath))
    qutils.call_subprocess(shlex.split(cmd), stdout=open(log_fpath, 'a'), stderr=open(log_fpath, 'a'), logger=logger)
    if not os.path.exists(db_fpath + '.nsq') or os.path.getsize(db_fpath + '.nsq') < db_nsq_fsize:
        logger.error('Failed to make BLAST database ("' + blastdb_dirpath +
                     '"). See details in log. Try to make it manually: %s' % cmd)
        return 1
    elif not qconfig.debug:
        os.remove(db_gz_fpath)
        os.remove(silva_fpath)
    return 0
Beispiel #3
0
def gmhmm_p_everyGC(tool_dirpath, fasta_fpath, err_fpath, index, tmp_dirpath,
                    num_threads):
    tmp_dirpath = tempfile.mkdtemp(dir=tmp_dirpath)

    tool_exec_fpath = os.path.join(tool_dirpath, 'gmsn.pl')
    err_file = open(err_fpath, 'w')
    fasta_name = qutils.name_from_fpath(fasta_fpath)
    return_code = qutils.call_subprocess([
        'perl', tool_exec_fpath, '--name', fasta_name, '--clean', '--out',
        tmp_dirpath, fasta_fpath
    ],
                                         stdout=err_file,
                                         stderr=err_file,
                                         indent='    ' +
                                         qutils.index_to_str(index))
    if return_code != 0:
        return

    genes = []
    tool_exec_fpath = os.path.join(tool_dirpath, 'gmhmmp')
    sub_fasta_fpath = os.path.join(tmp_dirpath, fasta_name)
    out_fpath = sub_fasta_fpath + '.gmhmm'
    heu_fpath = os.path.join(tmp_dirpath, fasta_name + '_hmm_heuristic.mod')
    with open(err_fpath, 'a') as err_file:
        ok = gmhmm_p(tool_exec_fpath, fasta_fpath, heu_fpath, out_fpath,
                     err_file, index)
        if ok:
            genes.extend(parse_gmhmm_out(out_fpath))

    if not qconfig.debug:
        shutil.rmtree(tmp_dirpath)

    return genes
Beispiel #4
0
def gmhmm_p_everyGC(tool_dirpath, fasta_fpath, err_fpath, index, tmp_dirpath, num_threads):
    tmp_dirpath = tempfile.mkdtemp(dir=tmp_dirpath)

    tool_exec_fpath = os.path.join(tool_dirpath, 'gmsn.pl')
    err_file = open(err_fpath, 'w')
    fasta_name = qutils.name_from_fpath(fasta_fpath)
    return_code = qutils.call_subprocess(
        ['perl', tool_exec_fpath, '--name', fasta_name, '--clean', '--out', tmp_dirpath,
         fasta_fpath],
        stdout=err_file,
        stderr=err_file,
        indent='    ' + qutils.index_to_str(index))
    if return_code != 0:
        return

    genes = []
    tool_exec_fpath = os.path.join(tool_dirpath, 'gmhmmp')
    sub_fasta_fpath = os.path.join(tmp_dirpath, fasta_name)
    out_fpath = sub_fasta_fpath + '.gmhmm'
    heu_fpath = os.path.join(tmp_dirpath, fasta_name + '_hmm_heuristic.mod')
    with open(err_fpath, 'a') as err_file:
        ok = gmhmm_p(tool_exec_fpath, fasta_fpath, heu_fpath,
                   out_fpath, err_file, index)
        if ok:
            genes.extend(parse_gmhmm_out(out_fpath))

    if not qconfig.debug:
        shutil.rmtree(tmp_dirpath)

    return genes
Beispiel #5
0
def run_gage(i, contigs_fpath, gage_results_dirpath, gage_tool_path, reference, tmp_dir):
    assembly_name = qutils.name_from_fpath(contigs_fpath)
    assembly_label = qutils.label_from_fpath(contigs_fpath)

    logger.info('  ' + qutils.index_to_str(i) + assembly_label + '...')

    # run gage tool
    log_out_fpath = os.path.join(gage_results_dirpath, 'gage_' + assembly_name + '.stdout')
    log_err_fpath = os.path.join(gage_results_dirpath, 'gage_' + assembly_name + '.stderr')
    logger.info('  ' + qutils.index_to_str(i) + 'Logging to files ' +
                os.path.basename(log_out_fpath) + ' and ' +
                os.path.basename(log_err_fpath) + '...')
    log_out_f = open(log_out_fpath, 'w')
    log_err_f = open(log_err_fpath, 'w')

    return_code = qutils.call_subprocess(
        ['sh', gage_tool_path, reference, contigs_fpath, tmp_dir, str(qconfig.min_contig)],
        stdout=log_out_f,
        stderr=log_err_f,
        indent='  ' + qutils.index_to_str(i),
        only_if_debug=False)
    if return_code != 0:
        logger.info('  ' + qutils.index_to_str(i) + 'Failed.')
    else:
        logger.info('  ' + qutils.index_to_str(i) + 'Done.')

    log_out_f.close()
    log_err_f.close()

    return return_code
Beispiel #6
0
def run_gage(i, contigs_fpath, gage_results_dirpath, gage_tool_path, reference, tmp_dir):
    assembly_label = qutils.label_from_fpath_for_fname(contigs_fpath)

    logger.info('  ' + qutils.index_to_str(i) + assembly_label + '...')

    # run gage tool
    log_out_fpath = os.path.join(gage_results_dirpath, 'gage_' + assembly_label + '.stdout')
    log_err_fpath = os.path.join(gage_results_dirpath, 'gage_' + assembly_label + '.stderr')
    logger.info('  ' + qutils.index_to_str(i) + 'Logging to files ' +
                os.path.basename(log_out_fpath) + ' and ' +
                os.path.basename(log_err_fpath) + '...')
    log_out_f = open(log_out_fpath, 'w')
    log_err_f = open(log_err_fpath, 'w')

    return_code = qutils.call_subprocess(
        ['sh', gage_tool_path, reference, contigs_fpath, tmp_dir, str(qconfig.min_contig)],
        stdout=log_out_f,
        stderr=log_err_f,
        indent='  ' + qutils.index_to_str(i),
        only_if_debug=False)
    if return_code != 0:
        logger.info('  ' + qutils.index_to_str(i) + 'Failed.')
    else:
        logger.info('  ' + qutils.index_to_str(i) + 'Done.')

    log_out_f.close()
    log_err_f.close()

    return return_code
Beispiel #7
0
 def run(contig_path, tmp_path):
     with open(err_path, 'a') as err_file:
         return_code = qutils.call_subprocess(
             [tool_exec, contig_path, '-d', trained_dir, '-g', '-o', tmp_path],
             stdout=err_file,
             stderr=err_file,
             indent='  ' + qutils.index_to_str(index) + '  ')
         return return_code
Beispiel #8
0
def parallel_blast(contigs_fpath, label, blast_res_fpath, err_fpath,
                   blast_check_fpath, blast_threads):
    cmd = blast_fpath('blastn') + (
        ' -query %s -db %s -outfmt 7 -num_threads %s' %
        (contigs_fpath, db_fpath, blast_threads))
    res_fpath = blast_res_fpath + '_' + label
    check_fpath = blast_check_fpath + '_' + label
    logger.info('  ' + 'processing ' + label)
    qutils.call_subprocess(shlex.split(cmd),
                           stdout=open(res_fpath, 'w'),
                           stderr=open(err_fpath, 'a'),
                           logger=logger)
    logger.info('  ' + 'BLAST results for %s are saved to %s...' %
                (label, res_fpath))
    with open(check_fpath, 'w') as check_file:
        check_file.writelines('Assembly: %s size: %d\n' %
                              (contigs_fpath, os.path.getsize(contigs_fpath)))
    return
Beispiel #9
0
def do(contigs_fpaths, gene_lengths, out_dirpath):
    logger.print_timestamp()
    logger.main_info('Running GlimmerHMM...')

    tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
    tool_src_dirpath = os.path.join(tool_dirpath, 'src')
    tool_exec_fpath = os.path.join(tool_dirpath, 'glimmerhmm')
    tmp_dirpath = os.path.join(out_dirpath, 'tmp')

    if not os.path.isfile(tool_exec_fpath):
        # making
        logger.main_info("Compiling GlimmerHMM...")
        return_code = qutils.call_subprocess(
            ['make', '-C', tool_src_dirpath],
            stdout=open(os.path.join(tool_src_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(tool_src_dirpath, 'make.err'), 'w'),
            indent='    ')
        if return_code != 0 or not os.path.isfile(tool_exec_fpath):
            logger.error(
                "Failed to compile GlimmerHMM (" + tool_src_dirpath +
                ")!\nTry to compile it manually or do not use --gene-finding "
                "option with --eukaryote.\nUse --debug option to see the command lines."
            )
            return

    if not os.path.isdir(out_dirpath):
        os.makedirs(out_dirpath)
    if not os.path.isdir(tmp_dirpath):
        os.makedirs(tmp_dirpath)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    from joblib import Parallel, delayed
    results = Parallel(n_jobs=n_jobs)(
        delayed(predict_genes)(index, contigs_fpath, gene_lengths, out_dirpath,
                               tool_dirpath, tmp_dirpath)
        for index, contigs_fpath in enumerate(contigs_fpaths))

    # saving results
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)
        unique, cnt = results[i]
        if unique is not None:
            report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique)
        if cnt is not None:
            report.add_field(reporting.Fields.PREDICTED_GENES, cnt)
        if unique is None and cnt is None:
            logger.error(
                'Glimmer failed running Glimmer for %s. ' +
                ('Run with the --debug option'
                 ' to see the command line.' if not qconfig.debug else '') %
                qutils.label_from_fpath(contigs_fpath))

    if not qconfig.debug:
        shutil.rmtree(tmp_dirpath)

    logger.main_info('Done.')
Beispiel #10
0
 def run(contig_path, tmp_path):
     with open(err_path, 'a') as err_file:
         return_code = qutils.call_subprocess([
             tool_exec, contig_path, '-d', trained_dir, '-g', '-o', tmp_path
         ],
                                              stdout=err_file,
                                              stderr=err_file,
                                              indent='  ' +
                                              qutils.index_to_str(index) +
                                              '  ')
         return return_code
Beispiel #11
0
def gmhmm_p(tool_exec, fasta_fpath, heu_fpath, out_fpath, err_file, index):
    """ Run GeneMark.hmm with this heuristic model (heu_dirpath)
        prompt> gmhmmp -m heu_11_45.mod sequence
        prompt> gm -m heu_11_45.mat sequence"""
    return_code = qutils.call_subprocess(
        [tool_exec, '-d', '-p', '0', '-m', heu_fpath, '-o', out_fpath, fasta_fpath],
        stdout=err_file,
        stderr=err_file,
        indent='    ' + qutils.index_to_str(index))

    return return_code == 0 and os.path.isfile(out_fpath)
Beispiel #12
0
def gmhmm_p(tool_exec, fasta_fpath, heu_fpath, out_fpath, err_file, index):
    """ Run GeneMark.hmm with this heuristic model (heu_dirpath)
        prompt> gmhmmp -m heu_11_45.mod sequence
        prompt> gm -m heu_11_45.mat sequence"""
    return_code = qutils.call_subprocess(
        [tool_exec, '-d', '-p', '0', '-m', heu_fpath, '-o', out_fpath, fasta_fpath],
        stdout=err_file,
        stderr=err_file,
        indent='    ' + qutils.index_to_str(index))

    return return_code == 0 and os.path.isfile(out_fpath)
Beispiel #13
0
def do(contigs_fpaths, gene_lengths, out_dirpath):
    logger.print_timestamp()
    logger.main_info('Running GlimmerHMM...')

    tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
    tool_src_dirpath = os.path.join(tool_dirpath, 'src')
    tool_exec_fpath = os.path.join(tool_dirpath, 'glimmerhmm')
    tmp_dirpath = os.path.join(out_dirpath, 'tmp')

    if not os.path.isfile(tool_exec_fpath):
        # making
        logger.main_info("Compiling GlimmerHMM...")
        return_code = qutils.call_subprocess(
            ['make', '-C', tool_src_dirpath],
            stdout=open(os.path.join(tool_src_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(tool_src_dirpath, 'make.err'), 'w'),
            indent='    ')
        if return_code != 0 or not os.path.isfile(tool_exec_fpath):
            logger.error("Failed to compile GlimmerHMM (" + tool_src_dirpath +
                         ")!\nTry to compile it manually or do not use --gene-finding "
                         "option with --eukaryote.\nUse --debug option to see the command lines.")
            return

    if not os.path.isdir(out_dirpath):
        os.makedirs(out_dirpath)
    if not os.path.isdir(tmp_dirpath):
        os.makedirs(tmp_dirpath)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    from joblib import Parallel, delayed
    results = Parallel(n_jobs=n_jobs)(delayed(predict_genes)(
        index, contigs_fpath, gene_lengths, out_dirpath, tool_dirpath, tmp_dirpath)
        for index, contigs_fpath in enumerate(contigs_fpaths))

    # saving results
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)
        unique, cnt = results[i]
        if unique is not None:
            report.add_field(reporting.Fields.PREDICTED_GENES_UNIQUE, unique)
        if cnt is not None:
            report.add_field(reporting.Fields.PREDICTED_GENES, cnt)
        if unique is None and cnt is None:
            logger.error(
                'Glimmer failed running Glimmer for %s. ' + ('Run with the --debug option'
                ' to see the command line.' if not qconfig.debug else '') % qutils.label_from_fpath(contigs_fpath))

    if not qconfig.debug:
        shutil.rmtree(tmp_dirpath)

    logger.main_info('Done.')
Beispiel #14
0
def gm_es(tool_dirpath, fasta_fpath, err_fpath, index, tmp_dirpath, num_threads):
    tool_exec_fpath = os.path.join(tool_dirpath, 'gmes_petap.pl')
    libs_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'genemark-es', 'lib')
    err_file = open(err_fpath, 'w')
    tmp_dirpath += qutils.name_from_fpath(fasta_fpath)
    if not os.path.isdir(tmp_dirpath):
        os.mkdir(tmp_dirpath)
    return_code = qutils.call_subprocess(
        ['perl', '-I', libs_dirpath, tool_exec_fpath, '--ES', '--cores', str(num_threads), '--sequence', fasta_fpath,
         '--out', tmp_dirpath],
        stdout=err_file,
        stderr=err_file,
        indent='    ' + qutils.index_to_str(index))
    if return_code != 0:
        return
    genes = []
    _, _, fnames = os.walk(tmp_dirpath).next()
    for fname in fnames:
        if fname.endswith('gtf'):
            genes.extend(parse_gtf_out(os.path.join(tmp_dirpath, fname)))
    return genes
Beispiel #15
0
def gm_es(tool_dirpath, fasta_fpath, err_fpath, index, tmp_dirpath,
          num_threads):
    tool_exec_fpath = os.path.join(tool_dirpath, 'gmes_petap.pl')
    libs_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'genemark-es', 'lib')
    err_file = open(err_fpath, 'w')
    tmp_dirpath += qutils.name_from_fpath(fasta_fpath)
    if not os.path.isdir(tmp_dirpath):
        os.mkdir(tmp_dirpath)
    return_code = qutils.call_subprocess([
        'perl', '-I', libs_dirpath, tool_exec_fpath, '--ES', '--cores',
        str(num_threads), '--sequence', fasta_fpath, '--out', tmp_dirpath
    ],
                                         stdout=err_file,
                                         stderr=err_file,
                                         indent='    ' +
                                         qutils.index_to_str(index))
    if return_code != 0:
        return
    genes = []
    _, _, fnames = os.walk(tmp_dirpath).next()
    for fname in fnames:
        if fname.endswith('gtf'):
            genes.extend(parse_gtf_out(os.path.join(tmp_dirpath, fname)))
    return genes
Beispiel #16
0
def do(ref_fpath,
       contigs_fpaths,
       reads_fpaths,
       meta_ref_fpaths,
       output_dir,
       interleaved=False,
       external_logger=None):
    if external_logger:
        global logger
        logger = external_logger
    logger.print_timestamp()
    logger.main_info('Running Structural Variants caller...')

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    if not all_required_binaries_exist(bowtie_dirpath, 'bowtie2-align-l'):
        # making
        logger.main_info('Compiling Bowtie2 (details are in ' +
                         os.path.join(bowtie_dirpath, 'make.log') +
                         ' and make.err)')
        return_code = qutils.call_subprocess(
            ['make', '-C', bowtie_dirpath],
            stdout=open(os.path.join(bowtie_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(bowtie_dirpath, 'make.err'), 'w'),
            logger=logger)

        if return_code != 0 or not all_required_binaries_exist(
                bowtie_dirpath, 'bowtie2-align-l'):
            logger.error(
                'Failed to compile Bowtie2 (' + bowtie_dirpath + ')! '
                'Try to compile it manually. ' +
                ('You can restart QUAST with the --debug flag '
                 'to see the command line.' if not qconfig.debug else ''))
            logger.main_info('Failed searching structural variations')
            return None

    if not all_required_binaries_exist(samtools_dirpath, 'samtools'):
        # making
        logger.main_info('Compiling SAMtools (details are in ' +
                         os.path.join(samtools_dirpath, 'make.log') +
                         ' and make.err)')
        return_code = qutils.call_subprocess(
            ['make', '-C', samtools_dirpath],
            stdout=open(os.path.join(samtools_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(samtools_dirpath, 'make.err'), 'w'),
            logger=logger)

        if return_code != 0 or not all_required_binaries_exist(
                samtools_dirpath, 'samtools'):
            logger.error(
                'Failed to compile SAMtools (' + samtools_dirpath + ')! '
                'Try to compile it manually. ' +
                ('You can restart QUAST with the --debug flag '
                 'to see the command line.' if not qconfig.debug else ''))
            logger.main_info('Failed searching structural variations')
            return None

    if not all_required_binaries_exist(manta_bin_dirpath, 'configManta.py'):
        # making
        if not os.path.exists(manta_build_dirpath):
            os.mkdir(manta_build_dirpath)
        if qconfig.platform_name == 'linux_64':
            logger.main_info('  Downloading binary distribution of Manta...')
            manta_downloaded_fpath = os.path.join(manta_build_dirpath,
                                                  'manta.tar.bz2')
            response = urllib2.urlopen(manta_download_path)
            content = response.read()
            if content:
                logger.main_info('  Manta successfully downloaded!')
                f = open(manta_downloaded_fpath + '.download', 'w')
                f.write(content)
                f.close()
                if os.path.exists(manta_downloaded_fpath + '.download'):
                    logger.info('  Unpacking Manta...')
                    shutil.move(manta_downloaded_fpath + '.download',
                                manta_downloaded_fpath)
                    import tarfile
                    tar = tarfile.open(manta_downloaded_fpath, "r:bz2")
                    tar.extractall(manta_build_dirpath)
                    tar.close()
                    manta_temp_dirpath = os.path.join(manta_build_dirpath,
                                                      tar.members[0].name)
                    from distutils.dir_util import copy_tree
                    copy_tree(manta_temp_dirpath, manta_build_dirpath)
                    shutil.rmtree(manta_temp_dirpath)
                    os.remove(manta_downloaded_fpath)
                    logger.main_info('  Done')
            else:
                logger.main_info('  Failed downloading Manta from %s!' %
                                 manta_download_path)

        if not all_required_binaries_exist(manta_bin_dirpath,
                                           'configManta.py'):
            logger.main_info('Compiling Manta (details are in ' +
                             os.path.join(manta_dirpath, 'make.log') +
                             ' and make.err)')
            prev_dir = os.getcwd()
            os.chdir(manta_build_dirpath)
            return_code = qutils.call_subprocess(
                [
                    os.path.join(manta_dirpath, 'source', 'src', 'configure'),
                    '--prefix=' + os.path.join(manta_dirpath, 'build'),
                    '--jobs=' + str(qconfig.max_threads)
                ],
                stdout=open(os.path.join(manta_dirpath, 'make.log'), 'w'),
                stderr=open(os.path.join(manta_dirpath, 'make.err'), 'w'),
                logger=logger)
            if return_code == 0:
                return_code = qutils.call_subprocess(
                    ['make', '-j' + str(qconfig.max_threads)],
                    stdout=open(os.path.join(manta_dirpath, 'make.log'), 'a'),
                    stderr=open(os.path.join(manta_dirpath, 'make.err'), 'a'),
                    logger=logger)
                if return_code == 0:
                    return_code = qutils.call_subprocess(
                        ['make', 'install'],
                        stdout=open(os.path.join(manta_dirpath, 'make.log'),
                                    'a'),
                        stderr=open(os.path.join(manta_dirpath, 'make.err'),
                                    'a'),
                        logger=logger)
            os.chdir(prev_dir)
            if return_code != 0 or not all_required_binaries_exist(
                    manta_bin_dirpath, 'configManta.py'):
                logger.error(
                    'Failed to compile Manta (' + manta_dirpath + ')! '
                    'Try to compile it manually ' +
                    ('or download binary distribution from https://github.com/Illumina/manta/releases '
                     'and unpack it into ' +
                     os.path.join(manta_dirpath, 'build/') if qconfig.
                     platform_name == 'linux_64' else '') +
                    ('. You can restart QUAST with the --debug flag '
                     'to see the command line.' if not qconfig.debug else '.'))
                logger.main_info(
                    'Failed searching structural variations. QUAST will search trivial deletions only.'
                )

    temp_output_dir = os.path.join(output_dir, 'temp_output')

    if not os.path.isdir(temp_output_dir):
        os.mkdir(temp_output_dir)

    log_path = os.path.join(output_dir, 'sv_calling.log')
    err_path = os.path.join(output_dir, 'sv_calling.err')
    logger.info('  ' + 'Logging to files %s and %s...' % (log_path, err_path))
    try:
        bed_fpath = run_processing_reads(
            ref_fpath, meta_ref_fpaths,
            contigs_analyzer.ref_labels_by_chromosomes, reads_fpaths,
            temp_output_dir, output_dir, log_path, err_path)
    except:
        bed_fpath = None
        logger.error(
            'Failed searching structural variations! This function is experimental and may work improperly. Sorry for the inconvenience.'
        )
    if not qconfig.debug:
        shutil.rmtree(temp_output_dir, ignore_errors=True)

    logger.info('Done.')
    return bed_fpath
Beispiel #17
0
def run_processing_reads(main_ref_fpath, meta_ref_fpaths, ref_labels,
                         reads_fpaths, output_dirpath, res_path, log_path,
                         err_path):
    ref_name = qutils.name_from_fpath(main_ref_fpath)
    sam_fpath = os.path.join(output_dirpath, ref_name + '.sam')
    bam_fpath = os.path.join(output_dirpath, ref_name + '.bam')
    bam_sorted_fpath = os.path.join(output_dirpath, ref_name + '.sorted')
    sam_sorted_fpath = os.path.join(output_dirpath, ref_name + '.sorted.sam')
    bed_fpath = os.path.join(res_path, ref_name + '.bed')

    if is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
        return bed_fpath

    logger.info('  ' + 'Pre-processing for searching structural variations...')
    logger.info('  ' + 'Logging to %s...' % err_path)
    if is_non_empty_file(sam_fpath):
        logger.info('  Using existing SAM-file: ' + sam_fpath)
    else:
        logger.info('  Running Bowtie2...')
        abs_reads_fpaths = [
        ]  # use absolute paths because we will change workdir
        for reads_fpath in reads_fpaths:
            abs_reads_fpaths.append(os.path.abspath(reads_fpath))

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        cmd = [bin_fpath('bowtie2-build'), main_ref_fpath, ref_name]
        qutils.call_subprocess(cmd,
                               stdout=open(log_path, 'a'),
                               stderr=open(err_path, 'a'),
                               logger=logger)

        cmd = bin_fpath('bowtie2') + ' -x ' + ref_name + ' -1 ' + abs_reads_fpaths[0] + ' -2 ' + abs_reads_fpaths[1] + ' -S ' + \
              sam_fpath + ' --no-unal -a -p %s' % str(qconfig.max_threads)
        qutils.call_subprocess(shlex.split(cmd),
                               stdout=open(log_path, 'a'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        logger.info('  Done.')
        os.chdir(prev_dir)
        if not os.path.exists(sam_fpath) or os.path.getsize(sam_fpath) == 0:
            logger.error('  Failed running Bowtie2 for the reference. See ' +
                         log_path + ' for information.')
            logger.info('  Failed searching structural variations.')
            return None
    logger.info('  Sorting SAM-file...')
    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        qutils.call_subprocess([
            samtools_fpath('samtools'), 'view', '-@',
            str(qconfig.max_threads), '-bS', sam_fpath
        ],
                               stdout=open(bam_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        qutils.call_subprocess([
            samtools_fpath('samtools'), 'sort', '-@',
            str(qconfig.max_threads), bam_fpath, bam_sorted_fpath
        ],
                               stderr=open(err_path, 'a'),
                               logger=logger)
        qutils.call_subprocess([
            samtools_fpath('samtools'), 'view', '-@',
            str(qconfig.max_threads), bam_sorted_fpath + '.bam'
        ],
                               stdout=open(sam_sorted_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
    if meta_ref_fpaths:
        logger.info('  Splitting SAM-file by references...')
    headers = []
    seq_name_length = {}
    with open(sam_fpath) as sam_file:
        for line in sam_file:
            if not line.startswith('@'):
                break
            if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                seq_name = line.split('\tSN:')[1].split('\t')[0]
                seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                seq_name_length[seq_name] = seq_length
            headers.append(line.strip())
    need_ref_splitting = False
    if meta_ref_fpaths:
        ref_files = {}
        for cur_ref_fpath in meta_ref_fpaths:
            ref = qutils.name_from_fpath(cur_ref_fpath)
            new_ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
            if is_non_empty_file(new_ref_sam_fpath):
                logger.info('    Using existing split SAM-file for %s: %s' %
                            (ref, new_ref_sam_fpath))
                ref_files[ref] = None
            else:
                new_ref_sam_file = open(new_ref_sam_fpath, 'w')
                new_ref_sam_file.write(headers[0] + '\n')
                chrs = []
                for h in (h for h in headers
                          if h.startswith('@SQ') and 'SN:' in h):
                    seq_name = h.split('\tSN:')[1].split('\t')[0]
                    if seq_name in ref_labels and ref_labels[seq_name] == ref:
                        new_ref_sam_file.write(h + '\n')
                        chrs.append(seq_name)
                new_ref_sam_file.write(headers[-1] + '\n')
                ref_files[ref] = new_ref_sam_file
                need_ref_splitting = True
    deletions = []
    trivial_deletions_fpath = os.path.join(output_dirpath,
                                           qconfig.trivial_deletions_fname)
    logger.info(
        '  Looking for trivial deletions (long zero-covered fragments)...')
    need_trivial_deletions = True
    if os.path.exists(trivial_deletions_fpath):
        need_trivial_deletions = False
        logger.info('    Using existing file: ' + trivial_deletions_fpath)

    if need_trivial_deletions or need_ref_splitting:
        with open(sam_sorted_fpath) as sam_file:
            cur_deletion = None
            for line in sam_file:
                mapping = Mapping.parse(line)
                if mapping:
                    # common case: continue current deletion (potential) on the same reference
                    if cur_deletion and cur_deletion.ref == mapping.ref:
                        if cur_deletion.next_bad is None:  # previous mapping was in region BEFORE 0-covered fragment
                            # just passed 0-covered fragment
                            if mapping.start - cur_deletion.prev_bad > QuastDeletion.MIN_GAP:
                                cur_deletion.set_next_bad(mapping)
                                if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_next_good(mapping)
                                    if cur_deletion.is_valid():
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(
                                        mapping.ref).set_prev_good(mapping)
                            # continue region BEFORE 0-covered fragment
                            elif mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                cur_deletion.set_prev_good(mapping)
                            else:
                                cur_deletion.set_prev_bad(mapping)
                        else:  # previous mapping was in region AFTER 0-covered fragment
                            # just passed another 0-cov fragment between end of cur_deletion BAD region and this mapping
                            if mapping.start - cur_deletion.next_bad_end > QuastDeletion.MIN_GAP:
                                if cur_deletion.is_valid(
                                ):  # add previous fragment's deletion if needed
                                    deletions.append(cur_deletion)
                                cur_deletion = QuastDeletion(
                                    mapping.ref).set_prev_bad(
                                        position=cur_deletion.next_bad_end)
                            # continue region AFTER 0-covered fragment (old one or new/another one -- see "if" above)
                            if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                cur_deletion.set_next_good(mapping)
                                if cur_deletion.is_valid():
                                    deletions.append(cur_deletion)
                                cur_deletion = QuastDeletion(
                                    mapping.ref).set_prev_good(mapping)
                            else:
                                cur_deletion.set_next_bad_end(mapping)
                    # special case: just started or just switched to the next reference
                    else:
                        if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                            cur_deletion.set_next_good(
                                position=seq_name_length[cur_deletion.ref])
                            if cur_deletion.is_valid():
                                deletions.append(cur_deletion)
                        cur_deletion = QuastDeletion(
                            mapping.ref).set_prev_good(mapping)

                    if need_ref_splitting:
                        cur_ref = ref_labels[mapping.ref]
                        if mapping.ref_next.strip(
                        ) == '=' or cur_ref == ref_labels[mapping.ref_next]:
                            if ref_files[cur_ref] is not None:
                                ref_files[cur_ref].write(line)
            if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                cur_deletion.set_next_good(
                    position=seq_name_length[cur_deletion.ref])
                if cur_deletion.is_valid():
                    deletions.append(cur_deletion)
        if need_ref_splitting:
            for ref_handler in ref_files.values():
                if ref_handler is not None:
                    ref_handler.close()
        if need_trivial_deletions:
            logger.info('  Trivial deletions: %d found' % len(deletions))
            logger.info('    Saving to: ' + trivial_deletions_fpath)
            with open(trivial_deletions_fpath, 'w') as f:
                for deletion in deletions:
                    f.write(str(deletion) + '\n')

    if os.path.exists(config_manta_fpath):
        manta_sv_fpath = search_sv_with_manta(main_ref_fpath, meta_ref_fpaths,
                                              output_dirpath, err_path)
        qutils.cat_files([manta_sv_fpath, trivial_deletions_fpath], bed_fpath)
    elif os.path.exists(trivial_deletions_fpath):
        shutil.copy(trivial_deletions_fpath, bed_fpath)

    if os.path.exists(bed_fpath):
        logger.main_info('  Structural variations saved to ' + bed_fpath)
        return bed_fpath
    else:
        logger.main_info('  Failed searching structural variations.')
        return None
Beispiel #18
0
def run_processing_reads(main_ref_fpath, meta_ref_fpaths, ref_labels, reads_fpaths, output_dirpath, res_path, log_path, err_path):
    ref_name = qutils.name_from_fpath(main_ref_fpath)
    sam_fpath = os.path.join(output_dirpath, ref_name + '.sam')
    bam_fpath = os.path.join(output_dirpath, ref_name + '.bam')
    bam_sorted_fpath = os.path.join(output_dirpath, ref_name + '.sorted')
    sam_sorted_fpath = os.path.join(output_dirpath, ref_name + '.sorted.sam')
    bed_fpath = os.path.join(res_path, ref_name + '.bed')

    if is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
        return bed_fpath

    logger.info('  ' + 'Pre-processing for searching structural variations...')
    logger.info('  ' + 'Logging to %s...' % err_path)
    if is_non_empty_file(sam_fpath):
        logger.info('  Using existing SAM-file: ' + sam_fpath)
    else:
        logger.info('  Running Bowtie2...')
        abs_reads_fpaths = []  # use absolute paths because we will change workdir
        for reads_fpath in reads_fpaths:
            abs_reads_fpaths.append(os.path.abspath(reads_fpath))

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        cmd = [bin_fpath('bowtie2-build'), main_ref_fpath, ref_name]
        qutils.call_subprocess(cmd, stdout=open(log_path, 'a'), stderr=open(err_path, 'a'), logger=logger)

        cmd = bin_fpath('bowtie2') + ' -x ' + ref_name + ' -1 ' + abs_reads_fpaths[0] + ' -2 ' + abs_reads_fpaths[1] + ' -S ' + \
              sam_fpath + ' --no-unal -a -p %s' % str(qconfig.max_threads)
        qutils.call_subprocess(shlex.split(cmd), stdout=open(log_path, 'a'), stderr=open(err_path, 'a'), logger=logger)
        logger.info('  Done.')
        os.chdir(prev_dir)
        if not os.path.exists(sam_fpath) or os.path.getsize(sam_fpath) == 0:
            logger.error('  Failed running Bowtie2 for the reference. See ' + log_path + ' for information.')
            logger.info('  Failed searching structural variations.')
            return None
    logger.info('  Sorting SAM-file...')
    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        qutils.call_subprocess([samtools_fpath('samtools'), 'view', '-@', str(qconfig.max_threads), '-bS', sam_fpath], stdout=open(bam_fpath, 'w'),
                               stderr=open(err_path, 'a'), logger=logger)
        qutils.call_subprocess([samtools_fpath('samtools'), 'sort', '-@', str(qconfig.max_threads), bam_fpath, bam_sorted_fpath],
                               stderr=open(err_path, 'a'), logger=logger)
        qutils.call_subprocess([samtools_fpath('samtools'), 'view', '-@', str(qconfig.max_threads), bam_sorted_fpath + '.bam'], stdout=open(sam_sorted_fpath, 'w'),
                               stderr=open(err_path, 'a'), logger=logger)
    if meta_ref_fpaths:
        logger.info('  Splitting SAM-file by references...')
    headers = []
    seq_name_length = {}
    with open(sam_fpath) as sam_file:
        for line in sam_file:
            if not line.startswith('@'):
                break
            if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                seq_name = line.split('\tSN:')[1].split('\t')[0]
                seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                seq_name_length[seq_name] = seq_length
            headers.append(line.strip())
    need_ref_splitting = False
    if meta_ref_fpaths:
        ref_files = {}
        for cur_ref_fpath in meta_ref_fpaths:
            ref = qutils.name_from_fpath(cur_ref_fpath)
            new_ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
            if is_non_empty_file(new_ref_sam_fpath):
                logger.info('    Using existing split SAM-file for %s: %s' % (ref, new_ref_sam_fpath))
                ref_files[ref] = None
            else:
                new_ref_sam_file = open(new_ref_sam_fpath, 'w')
                new_ref_sam_file.write(headers[0] + '\n')
                chrs = []
                for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                    seq_name = h.split('\tSN:')[1].split('\t')[0]
                    if seq_name in ref_labels and ref_labels[seq_name] == ref:
                        new_ref_sam_file.write(h + '\n')
                        chrs.append(seq_name)
                new_ref_sam_file.write(headers[-1] + '\n')
                ref_files[ref] = new_ref_sam_file
                need_ref_splitting = True
    deletions = []
    trivial_deletions_fpath = os.path.join(output_dirpath, qconfig.trivial_deletions_fname)
    logger.info('  Looking for trivial deletions (long zero-covered fragments)...')
    need_trivial_deletions = True
    if os.path.exists(trivial_deletions_fpath):
        need_trivial_deletions = False
        logger.info('    Using existing file: ' + trivial_deletions_fpath)

    if need_trivial_deletions or need_ref_splitting:
        with open(sam_sorted_fpath) as sam_file:
            cur_deletion = None
            for line in sam_file:
                mapping = Mapping.parse(line)
                if mapping:
                    # common case: continue current deletion (potential) on the same reference
                    if cur_deletion and cur_deletion.ref == mapping.ref:
                        if cur_deletion.next_bad is None:  # previous mapping was in region BEFORE 0-covered fragment
                            # just passed 0-covered fragment
                            if mapping.start - cur_deletion.prev_bad > QuastDeletion.MIN_GAP:
                                cur_deletion.set_next_bad(mapping)
                                if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_next_good(mapping)
                                    if cur_deletion.is_valid():
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)
                            # continue region BEFORE 0-covered fragment
                            elif mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                cur_deletion.set_prev_good(mapping)
                            else:
                                cur_deletion.set_prev_bad(mapping)
                        else:  # previous mapping was in region AFTER 0-covered fragment
                            # just passed another 0-cov fragment between end of cur_deletion BAD region and this mapping
                            if mapping.start - cur_deletion.next_bad_end > QuastDeletion.MIN_GAP:
                                if cur_deletion.is_valid():   # add previous fragment's deletion if needed
                                    deletions.append(cur_deletion)
                                cur_deletion = QuastDeletion(mapping.ref).set_prev_bad(position=cur_deletion.next_bad_end)
                            # continue region AFTER 0-covered fragment (old one or new/another one -- see "if" above)
                            if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                cur_deletion.set_next_good(mapping)
                                if cur_deletion.is_valid():
                                    deletions.append(cur_deletion)
                                cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)
                            else:
                                cur_deletion.set_next_bad_end(mapping)
                    # special case: just started or just switched to the next reference
                    else:
                        if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                            cur_deletion.set_next_good(position=seq_name_length[cur_deletion.ref])
                            if cur_deletion.is_valid():
                                deletions.append(cur_deletion)
                        cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)

                    if need_ref_splitting:
                        cur_ref = ref_labels[mapping.ref]
                        if mapping.ref_next.strip() == '=' or cur_ref == ref_labels[mapping.ref_next]:
                            if ref_files[cur_ref] is not None:
                                ref_files[cur_ref].write(line)
            if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                cur_deletion.set_next_good(position=seq_name_length[cur_deletion.ref])
                if cur_deletion.is_valid():
                    deletions.append(cur_deletion)
        if need_ref_splitting:
            for ref_handler in ref_files.values():
                if ref_handler is not None:
                    ref_handler.close()
        if need_trivial_deletions:
            logger.info('  Trivial deletions: %d found' % len(deletions))
            logger.info('    Saving to: ' + trivial_deletions_fpath)
            with open(trivial_deletions_fpath, 'w') as f:
                for deletion in deletions:
                    f.write(str(deletion) + '\n')

    if os.path.exists(config_manta_fpath):
        manta_sv_fpath = search_sv_with_manta(main_ref_fpath, meta_ref_fpaths, output_dirpath, err_path)
        qutils.cat_files([manta_sv_fpath, trivial_deletions_fpath], bed_fpath)
    elif os.path.exists(trivial_deletions_fpath):
        shutil.copy(trivial_deletions_fpath, bed_fpath)

    if os.path.exists(bed_fpath):
        logger.main_info('  Structural variations saved to ' + bed_fpath)
        return bed_fpath
    else:
        logger.main_info('  Failed searching structural variations.')
        return None
Beispiel #19
0
def process_one_ref(cur_ref_fpath, output_dirpath, err_path, bed_fpath=None):
    ref = qutils.name_from_fpath(cur_ref_fpath)
    ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
    ref_bam_fpath = os.path.join(output_dirpath, ref + '.bam')
    ref_bamsorted_fpath = os.path.join(output_dirpath, ref + '.sorted')
    ref_bed_fpath = bed_fpath if bed_fpath else os.path.join(
        output_dirpath, ref + '.bed')
    if os.path.getsize(
            ref_sam_fpath
    ) < 1024 * 1024:  # TODO: make it better (small files will cause Manta crush -- "not enough reads...")
        logger.info('  SAM file is too small for Manta (%d Kb), skipping..' %
                    (os.path.getsize(ref_sam_fpath) // 1024))
        return None
    if is_non_empty_file(ref_bed_fpath):
        logger.info('  Using existing Manta BED-file: ' + ref_bed_fpath)
        return ref_bed_fpath
    if not os.path.exists(ref_bamsorted_fpath + '.bam'):
        qutils.call_subprocess(
            [samtools_fpath('samtools'), 'view', '-bS', ref_sam_fpath],
            stdout=open(ref_bam_fpath, 'w'),
            stderr=open(err_path, 'a'),
            logger=logger)
        qutils.call_subprocess([
            samtools_fpath('samtools'), 'sort', ref_bam_fpath,
            ref_bamsorted_fpath
        ],
                               stderr=open(err_path, 'a'),
                               logger=logger)
    if not is_non_empty_file(ref_bamsorted_fpath + '.bam.bai'):
        qutils.call_subprocess([
            samtools_fpath('samtools'), 'index', ref_bamsorted_fpath + '.bam'
        ],
                               stderr=open(err_path, 'a'),
                               logger=logger)
    if not is_non_empty_file(cur_ref_fpath + '.fai'):
        qutils.call_subprocess(
            [samtools_fpath('samtools'), 'faidx', cur_ref_fpath],
            stderr=open(err_path, 'a'),
            logger=logger)
    vcfoutput_dirpath = os.path.join(output_dirpath, ref + '_manta')
    found_SV_fpath = os.path.join(vcfoutput_dirpath,
                                  'results/variants/diploidSV.vcf.gz')
    unpacked_SV_fpath = found_SV_fpath + '.unpacked'
    if not is_non_empty_file(found_SV_fpath):
        if os.path.exists(vcfoutput_dirpath):
            shutil.rmtree(vcfoutput_dirpath, ignore_errors=True)
        os.makedirs(vcfoutput_dirpath)
        qutils.call_subprocess([
            config_manta_fpath, '--normalBam', ref_bamsorted_fpath + '.bam',
            '--referenceFasta', cur_ref_fpath, '--runDir', vcfoutput_dirpath
        ],
                               stdout=open(err_path, 'a'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        if not os.path.exists(os.path.join(vcfoutput_dirpath,
                                           'runWorkflow.py')):
            return None
        qutils.call_subprocess([
            os.path.join(vcfoutput_dirpath, 'runWorkflow.py'), '-m', 'local',
            '-j',
            str(qconfig.max_threads)
        ],
                               stderr=open(err_path, 'a'),
                               logger=logger)
    if not is_non_empty_file(unpacked_SV_fpath):
        cmd = 'gunzip -c %s' % found_SV_fpath
        qutils.call_subprocess(shlex.split(cmd),
                               stdout=open(unpacked_SV_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
    from manta import vcfToBedpe
    vcfToBedpe.vcfToBedpe(open(unpacked_SV_fpath), open(ref_bed_fpath, 'w'))
    return ref_bed_fpath
Beispiel #20
0
def create_meta_report(results_dirpath, json_texts):
    html_fpath = os.path.join(results_dirpath, report_fname)
    if not os.path.isfile(html_fpath):
        init(html_fpath, is_meta=True)

    from libs import search_references_meta
    taxons_for_krona = search_references_meta.taxons_for_krona
    meta_log = get_logger(qconfig.LOGGER_META_NAME)
    if taxons_for_krona:
        meta_log.info('  Drawing interactive Krona plots...')
        krona_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'kronatools')
        krona_res_dirpath = os.path.join(results_dirpath,
                                         qconfig.krona_dirname)
        simplejson_error = False
        try:
            import json
        except ImportError:
            try:
                import simplejson as json
            except ImportError:
                log.warning(
                    'Can\'t draw Krona charts - please install python-simplejson'
                )
                simplejson_error = True
        if not simplejson_error:
            if not os.path.isdir(krona_res_dirpath):
                os.mkdir(krona_res_dirpath)
            json_data = json.loads(json_texts[0])
            assemblies = json_data['assembliesNames']
            krona_txt_ext = '_taxonomy.txt'
            krona_common_fpath = os.path.join(krona_res_dirpath,
                                              'overall' + krona_txt_ext)
            krona_common_file = open(krona_common_fpath, 'w')
            for index, name in enumerate(assemblies):
                krona_file = open(
                    os.path.join(krona_res_dirpath, name + krona_txt_ext), 'w')
                krona_file.close()
            for json_text in json_texts[1:]:
                json_data = json.loads(json_text)
                ref = json_data['referenceName']
                report = json_data['report'][0]
                for metric in report[1]:
                    if metric['metricName'] == 'Total length':
                        lengths = metric['values']
                        break
                cur_assemblies = json_data['assembliesNames']
                for index, name in enumerate(cur_assemblies):
                    krona_fpath = os.path.join(krona_res_dirpath,
                                               name + krona_txt_ext)
                    with open(krona_fpath, 'a') as f_krona:
                        if ref in taxons_for_krona:
                            f_krona.write(
                                str(lengths[index]) + '\t' +
                                taxons_for_krona[ref] + '\n')
                        else:
                            f_krona.write(str(lengths[index]) + '\n')
                if ref in taxons_for_krona:
                    krona_common_file.write(
                        str(sum(lengths)) + '\t' + taxons_for_krona[ref] +
                        '\n')
                else:
                    krona_common_file.write(str(sum(lengths)) + '\n')
            krona_common_file.close()
            krona_fpaths = []
            for index, name in enumerate(assemblies):
                krona_fpath = os.path.join(krona_res_dirpath,
                                           name + '_taxonomy_chart.html')
                krona_txt_fpath = os.path.join(krona_res_dirpath,
                                               name + krona_txt_ext)
                qutils.call_subprocess([
                    'perl', '-I', krona_dirpath + '/lib',
                    krona_dirpath + '/scripts/ImportText.pl', krona_txt_fpath,
                    '-o', krona_fpath, '-a'
                ],
                                       stdout=open(os.devnull, 'w'),
                                       stderr=open(os.devnull, 'w'))
                krona_fpaths.append(
                    os.path.join(qconfig.krona_dirname,
                                 name + '_taxonomy_chart.html'))
                meta_log.main_info('  Krona chart for ' + name +
                                   ' is saved to ' + krona_fpath)
                os.remove(krona_txt_fpath)
            if len(assemblies) > 1:
                name = 'summary'
                krona_fpath = os.path.join(krona_res_dirpath,
                                           name + '_taxonomy_chart.html')
                qutils.call_subprocess([
                    'perl', '-I', krona_dirpath + '/lib',
                    krona_dirpath + '/scripts/ImportText.pl',
                    krona_common_fpath, '-o', krona_fpath, '-a'
                ],
                                       stdout=open(os.devnull, 'w'),
                                       stderr=open(os.devnull, 'w'))
                meta_log.main_info('  Summary Krona chart is saved to ' +
                                   krona_fpath)
                krona_fpaths.append(
                    os.path.join(qconfig.krona_dirname, name +
                                 '_taxonomy_chart.html'))  # extra fpath!
            os.remove(krona_common_fpath)
            save_krona_paths(results_dirpath, krona_fpaths, assemblies)

    # reading html template file
    with open(html_fpath) as f_html:
        html_text = f_html.read()
    keyword = 'totalReport'
    html_text = re.sub('{{ ' + keyword + ' }}',
                       '[' + ','.join(json_texts) + ']', html_text)
    html_text = re.sub(r'{{(\s+\S+\s+)}}', '{}', html_text)
    with open(html_fpath, 'w') as f_html:
        f_html.write(html_text)
    meta_log.main_info(
        '  Extended version of HTML-report (for all references and assemblies) is saved to '
        + html_fpath)
Beispiel #21
0
def do(ref_fpath, contigs_fpaths, reads_fpaths, meta_ref_fpaths, output_dir, interleaved=False, external_logger=None):
    if external_logger:
        global logger
        logger = external_logger
    logger.print_timestamp()
    logger.main_info('Running Structural Variants caller...')

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    if not all_required_binaries_exist(bowtie_dirpath, 'bowtie2-align-l'):
        # making
        logger.main_info('Compiling Bowtie2 (details are in ' + os.path.join(bowtie_dirpath, 'make.log') + ' and make.err)')
        return_code = qutils.call_subprocess(
            ['make', '-C', bowtie_dirpath],
            stdout=open(os.path.join(bowtie_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(bowtie_dirpath, 'make.err'), 'w'), logger=logger)

        if return_code != 0 or not all_required_binaries_exist(bowtie_dirpath, 'bowtie2-align-l'):
            logger.error('Failed to compile Bowtie2 (' + bowtie_dirpath + ')! '
                                                                   'Try to compile it manually. ' + (
                             'You can restart QUAST with the --debug flag '
                             'to see the command line.' if not qconfig.debug else ''))
            logger.main_info('Failed searching structural variations')
            return None

    if not all_required_binaries_exist(samtools_dirpath, 'samtools'):
        # making
        logger.main_info(
            'Compiling SAMtools (details are in ' + os.path.join(samtools_dirpath, 'make.log') + ' and make.err)')
        return_code = qutils.call_subprocess(
            ['make', '-C', samtools_dirpath],
            stdout=open(os.path.join(samtools_dirpath, 'make.log'), 'w'),
            stderr=open(os.path.join(samtools_dirpath, 'make.err'), 'w'), logger=logger)

        if return_code != 0 or not all_required_binaries_exist(samtools_dirpath, 'samtools'):
            logger.error('Failed to compile SAMtools (' + samtools_dirpath + ')! '
                                                                             'Try to compile it manually. ' + (
                             'You can restart QUAST with the --debug flag '
                             'to see the command line.' if not qconfig.debug else ''))
            logger.main_info('Failed searching structural variations')
            return None

    if not all_required_binaries_exist(manta_bin_dirpath, 'configManta.py'):
        # making
        if not os.path.exists(manta_build_dirpath):
            os.mkdir(manta_build_dirpath)
        if qconfig.platform_name == 'linux_64':
            logger.main_info('  Downloading binary distribution of Manta...')
            manta_downloaded_fpath = os.path.join(manta_build_dirpath, 'manta.tar.bz2')
            response = urllib2.urlopen(manta_download_path)
            content = response.read()
            if content:
                logger.main_info('  Manta successfully downloaded!')
                f = open(manta_downloaded_fpath + '.download', 'w' )
                f.write(content)
                f.close()
                if os.path.exists(manta_downloaded_fpath + '.download'):
                    logger.info('  Unpacking Manta...')
                    shutil.move(manta_downloaded_fpath + '.download', manta_downloaded_fpath)
                    import tarfile
                    tar = tarfile.open(manta_downloaded_fpath, "r:bz2")
                    tar.extractall(manta_build_dirpath)
                    tar.close()
                    manta_temp_dirpath = os.path.join(manta_build_dirpath, tar.members[0].name)
                    from distutils.dir_util import copy_tree
                    copy_tree(manta_temp_dirpath, manta_build_dirpath)
                    shutil.rmtree(manta_temp_dirpath)
                    os.remove(manta_downloaded_fpath)
                    logger.main_info('  Done')
            else:
                logger.main_info('  Failed downloading Manta from %s!' % manta_download_path)

        if not all_required_binaries_exist(manta_bin_dirpath, 'configManta.py'):
            logger.main_info('Compiling Manta (details are in ' + os.path.join(manta_dirpath, 'make.log') + ' and make.err)')
            prev_dir = os.getcwd()
            os.chdir(manta_build_dirpath)
            return_code = qutils.call_subprocess(
                [os.path.join(manta_dirpath, 'source', 'src', 'configure'), '--prefix=' + os.path.join(manta_dirpath, 'build'),
                 '--jobs=' + str(qconfig.max_threads)],
                stdout=open(os.path.join(manta_dirpath, 'make.log'), 'w'),
                stderr=open(os.path.join(manta_dirpath, 'make.err'), 'w'), logger=logger)
            if return_code == 0:
                return_code = qutils.call_subprocess(
                    ['make', '-j' + str(qconfig.max_threads)],
                    stdout=open(os.path.join(manta_dirpath, 'make.log'), 'a'),
                    stderr=open(os.path.join(manta_dirpath, 'make.err'), 'a'), logger=logger)
                if return_code == 0:
                    return_code = qutils.call_subprocess(
                    ['make', 'install'],
                    stdout=open(os.path.join(manta_dirpath, 'make.log'), 'a'),
                    stderr=open(os.path.join(manta_dirpath, 'make.err'), 'a'), logger=logger)
            os.chdir(prev_dir)
            if return_code != 0 or not all_required_binaries_exist(manta_bin_dirpath, 'configManta.py'):
                logger.error('Failed to compile Manta (' + manta_dirpath + ')! '
                                                                       'Try to compile it manually ' + (
                                 'or download binary distribution from https://github.com/Illumina/manta/releases '
                                 'and unpack it into ' + os.path.join(manta_dirpath, 'build/') if qconfig.platform_name == 'linux_64' else '') + (
                                 '. You can restart QUAST with the --debug flag '
                                 'to see the command line.' if not qconfig.debug else '.'))
                logger.main_info('Failed searching structural variations. QUAST will search trivial deletions only.')

    temp_output_dir = os.path.join(output_dir, 'temp_output')

    if not os.path.isdir(temp_output_dir):
        os.mkdir(temp_output_dir)

    log_path = os.path.join(output_dir, 'sv_calling.log')
    err_path = os.path.join(output_dir, 'sv_calling.err')
    logger.info('  ' + 'Logging to files %s and %s...' % (log_path, err_path))
    try:
        bed_fpath = run_processing_reads(ref_fpath, meta_ref_fpaths, contigs_analyzer.ref_labels_by_chromosomes, reads_fpaths, temp_output_dir, output_dir, log_path, err_path)
    except:
        bed_fpath = None
        logger.error('Failed searching structural variations! This function is experimental and may work improperly. Sorry for the inconvenience.')
    if not qconfig.debug:
        shutil.rmtree(temp_output_dir, ignore_errors=True)

    logger.info('Done.')
    return bed_fpath
Beispiel #22
0
def download_blastdb():
    if os.path.isfile(db_fpath + '.nsq'):
        logger.info()
        logger.info(
            'SILVA 16S rRNA database has already been downloaded, unpacked and BLAST database created. '
            'If not, please remove %s and rerun MetaQUAST' % db_fpath + '.nsq')
        return 0
    log_fpath = os.path.join(blastdb_dirpath, 'blastdb.log')
    db_gz_fpath = os.path.join(blastdb_dirpath, silva_fname + '.gz')
    silva_fpath = os.path.join(blastdb_dirpath, silva_fname)

    logger.info()
    if os.path.isfile(db_gz_fpath):
        logger.info(
            'SILVA 16S ribosomal RNA gene database has already been downloaded.'
        )
    else:
        logger.info('Downloading SILVA 16S ribosomal RNA gene database...')
        if not os.path.isdir(blastdb_dirpath):
            os.mkdir(blastdb_dirpath)
        silva_download = urllib.URLopener()
        silva_remote_fpath = silva_db_path + silva_fname + '.gz'
        try:
            silva_download.retrieve(silva_remote_fpath,
                                    db_gz_fpath + '.download', show_progress)
        except Exception:
            logger.error(
                'Failed downloading SILVA 16S rRNA gene database (%s)! The search for reference genomes cannot be performed. '
                'Try to download it manually in %s and restart MetaQUAST.' %
                (silva_remote_fpath, blastdb_dirpath))
            return 1
        shutil.move(db_gz_fpath + '.download', db_gz_fpath)

    logger.info('Processing downloaded file. Logging to %s...' % log_fpath)
    if not os.path.isfile(silva_fpath):
        logger.info('Unpacking and replacing " " with "_"...')

        unpacked_fpath = silva_fpath + ".unpacked"
        cmd = "gunzip -c %s" % db_gz_fpath
        qutils.call_subprocess(shlex.split(cmd),
                               stdout=open(unpacked_fpath, 'w'),
                               stderr=open(log_fpath, 'a'),
                               logger=logger)

        in_file = open(unpacked_fpath).read()
        out_file = open(unpacked_fpath, 'w')
        out_file.write(in_file.replace(' ', '_'))
        out_file.close()
        shutil.move(unpacked_fpath, silva_fpath)

    logger.info('Making BLAST database...')
    cmd = blast_fpath('makeblastdb') + (' -in %s -dbtype nucl -out %s' %
                                        (silva_fpath, db_fpath))
    qutils.call_subprocess(shlex.split(cmd),
                           stdout=open(log_fpath, 'a'),
                           stderr=open(log_fpath, 'a'),
                           logger=logger)
    if not os.path.exists(db_fpath +
                          '.nsq') or os.path.getsize(db_fpath +
                                                     '.nsq') < db_nsq_fsize:
        logger.error('Failed to make BLAST database ("' + blastdb_dirpath +
                     '"). See details in log. Try to make it manually: %s' %
                     cmd)
        return 1
    elif not qconfig.debug:
        os.remove(db_gz_fpath)
        os.remove(silva_fpath)
    return 0
Beispiel #23
0
def create_meta_report(results_dirpath, json_texts):
    html_fpath = os.path.join(results_dirpath, report_fname)
    if not os.path.isfile(html_fpath):
        init(html_fpath, is_meta=True)

    from libs import search_references_meta

    taxons_for_krona = search_references_meta.taxons_for_krona
    meta_log = get_logger(qconfig.LOGGER_META_NAME)
    if taxons_for_krona:
        meta_log.info("  Drawing interactive Krona plots...")
        krona_dirpath = os.path.join(qconfig.LIBS_LOCATION, "kronatools")
        krona_res_dirpath = os.path.join(results_dirpath, qconfig.krona_dirname)
        simplejson_error = False
        try:
            import json
        except ImportError:
            try:
                import simplejson as json
            except ImportError:
                log.warning("Can't draw Krona charts - please install python-simplejson")
                simplejson_error = True
        if not simplejson_error:
            if not os.path.isdir(krona_res_dirpath):
                os.mkdir(krona_res_dirpath)
            json_data = json.loads(json_texts[0])
            assemblies = json_data["assembliesNames"]
            krona_txt_ext = "_taxonomy.txt"
            krona_common_fpath = os.path.join(krona_res_dirpath, "overall" + krona_txt_ext)
            krona_common_file = open(krona_common_fpath, "w")
            for index, name in enumerate(assemblies):
                krona_file = open(os.path.join(krona_res_dirpath, name + krona_txt_ext), "w")
                krona_file.close()
            for json_text in json_texts[1:]:
                json_data = json.loads(json_text)
                ref = json_data["referenceName"]
                report = json_data["report"][0]
                for metric in report[1]:
                    if metric["metricName"] == "Total length":
                        lengths = metric["values"]
                        break
                cur_assemblies = json_data["assembliesNames"]
                for index, name in enumerate(cur_assemblies):
                    krona_fpath = os.path.join(krona_res_dirpath, name + krona_txt_ext)
                    with open(krona_fpath, "a") as f_krona:
                        if ref in taxons_for_krona:
                            f_krona.write(str(lengths[index]) + "\t" + taxons_for_krona[ref] + "\n")
                        else:
                            f_krona.write(str(lengths[index]) + "\n")
                if ref in taxons_for_krona:
                    krona_common_file.write(str(sum(lengths)) + "\t" + taxons_for_krona[ref] + "\n")
                else:
                    krona_common_file.write(str(sum(lengths)) + "\n")
            krona_common_file.close()
            krona_fpaths = []
            for index, name in enumerate(assemblies):
                krona_fpath = os.path.join(krona_res_dirpath, name + "_taxonomy_chart.html")
                krona_txt_fpath = os.path.join(krona_res_dirpath, name + krona_txt_ext)
                qutils.call_subprocess(
                    [
                        "perl",
                        "-I",
                        krona_dirpath + "/lib",
                        krona_dirpath + "/scripts/ImportText.pl",
                        krona_txt_fpath,
                        "-o",
                        krona_fpath,
                        "-a",
                    ],
                    stdout=open(os.devnull, "w"),
                    stderr=open(os.devnull, "w"),
                )
                krona_fpaths.append(os.path.join(qconfig.krona_dirname, name + "_taxonomy_chart.html"))
                meta_log.main_info("  Krona chart for " + name + " is saved to " + krona_fpath)
                os.remove(krona_txt_fpath)
            if len(assemblies) > 1:
                name = "summary"
                krona_fpath = os.path.join(krona_res_dirpath, name + "_taxonomy_chart.html")
                qutils.call_subprocess(
                    [
                        "perl",
                        "-I",
                        krona_dirpath + "/lib",
                        krona_dirpath + "/scripts/ImportText.pl",
                        krona_common_fpath,
                        "-o",
                        krona_fpath,
                        "-a",
                    ],
                    stdout=open(os.devnull, "w"),
                    stderr=open(os.devnull, "w"),
                )
                meta_log.main_info("  Summary Krona chart is saved to " + krona_fpath)
                krona_fpaths.append(os.path.join(qconfig.krona_dirname, name + "_taxonomy_chart.html"))  # extra fpath!
            os.remove(krona_common_fpath)
            save_krona_paths(results_dirpath, krona_fpaths, assemblies)

    # reading html template file
    with open(html_fpath) as f_html:
        html_text = f_html.read()
    keyword = "totalReport"
    html_text = re.sub("{{ " + keyword + " }}", "[" + ",".join(json_texts) + "]", html_text)
    html_text = re.sub(r"{{(\s+\S+\s+)}}", "{}", html_text)
    with open(html_fpath, "w") as f_html:
        f_html.write(html_text)
    meta_log.main_info(
        "  Extended version of HTML-report (for all references and assemblies) is saved to " + html_fpath
    )
Beispiel #24
0
def process_one_ref(cur_ref_fpath, output_dirpath, err_path, bed_fpath=None):
    ref = qutils.name_from_fpath(cur_ref_fpath)
    ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
    ref_bam_fpath = os.path.join(output_dirpath, ref + '.bam')
    ref_bamsorted_fpath = os.path.join(output_dirpath, ref + '.sorted')
    ref_bed_fpath = bed_fpath if bed_fpath else os.path.join(output_dirpath, ref + '.bed')
    if os.path.getsize(ref_sam_fpath) < 1024 * 1024:  # TODO: make it better (small files will cause Manta crush -- "not enough reads...")
        logger.info('  SAM file is too small for Manta (%d Kb), skipping..' % (os.path.getsize(ref_sam_fpath) // 1024))
        return None
    if is_non_empty_file(ref_bed_fpath):
        logger.info('  Using existing Manta BED-file: ' + ref_bed_fpath)
        return ref_bed_fpath
    if not os.path.exists(ref_bamsorted_fpath + '.bam'):
        qutils.call_subprocess([samtools_fpath('samtools'), 'view', '-bS', ref_sam_fpath], stdout=open(ref_bam_fpath, 'w'),
                               stderr=open(err_path, 'a'), logger=logger)
        qutils.call_subprocess([samtools_fpath('samtools'), 'sort', ref_bam_fpath, ref_bamsorted_fpath],
                               stderr=open(err_path, 'a'), logger=logger)
    if not is_non_empty_file(ref_bamsorted_fpath + '.bam.bai'):
        qutils.call_subprocess([samtools_fpath('samtools'), 'index', ref_bamsorted_fpath + '.bam'],
                               stderr=open(err_path, 'a'), logger=logger)
    if not is_non_empty_file(cur_ref_fpath + '.fai'):
        qutils.call_subprocess([samtools_fpath('samtools'), 'faidx', cur_ref_fpath],
                               stderr=open(err_path, 'a'), logger=logger)
    vcfoutput_dirpath = os.path.join(output_dirpath, ref + '_manta')
    found_SV_fpath = os.path.join(vcfoutput_dirpath, 'results/variants/diploidSV.vcf.gz')
    unpacked_SV_fpath = found_SV_fpath + '.unpacked'
    if not is_non_empty_file(found_SV_fpath):
        if os.path.exists(vcfoutput_dirpath):
            shutil.rmtree(vcfoutput_dirpath, ignore_errors=True)
        os.makedirs(vcfoutput_dirpath)
        qutils.call_subprocess([config_manta_fpath, '--normalBam', ref_bamsorted_fpath + '.bam',
                                '--referenceFasta', cur_ref_fpath, '--runDir', vcfoutput_dirpath],
                               stdout=open(err_path, 'a'), stderr=open(err_path, 'a'), logger=logger)
        if not os.path.exists(os.path.join(vcfoutput_dirpath, 'runWorkflow.py')):
            return None
        qutils.call_subprocess([os.path.join(vcfoutput_dirpath, 'runWorkflow.py'), '-m', 'local', '-j', str(qconfig.max_threads)],
                               stderr=open(err_path, 'a'), logger=logger)
    if not is_non_empty_file(unpacked_SV_fpath):
        cmd = 'gunzip -c %s' % found_SV_fpath
        qutils.call_subprocess(shlex.split(cmd), stdout=open(unpacked_SV_fpath, 'w'),
                               stderr=open(err_path, 'a'), logger=logger)
    from manta import vcfToBedpe
    vcfToBedpe.vcfToBedpe(open(unpacked_SV_fpath), open(ref_bed_fpath, 'w'))
    return ref_bed_fpath