Example #1
0
def dep_check_blast(dir_dep, os_id, dist_id, debian_dists, redhat_dists,
                    force):
    if os_id == 'mac':
        url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.10.1/'
               'ncbi-blast-2.10.1+-x64-macosx.tar.gz')
    elif os_id == 'linux':
        if dist_id in debian_dists:
            url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/'
                   '2.10.1/ncbi-blast-2.10.1+-x64-linux.tar.gz')
        elif dist_id in redhat_dists:
            url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/'
                   '2.10.1/ncbi-blast-2.10.1+-x64-linux.tar.gz')

    dnld_path = opj(dir_dep, 'ncbi-blast.tar.gz')

    makeblastdb = None
    blastn = None
    tblastn = None

    try:
        if force is True:
            raise
        makeblastdb = which('makeblastdb')
        blastn = which('blastn')
        tblastn = which('tblastn')
        run([makeblastdb, '-help'])
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'ncbi-blast'))
            makeblastdb = opj(dir_bin, 'bin', 'makeblastdb')
            blastn = opj(dir_bin, 'bin', 'blastn')
            tblastn = opj(dir_bin, 'bin', 'tblastn')
            run([makeblastdb, '-help'])
        except Exception:
            Log.wrn('BLAST+ was not found on this system, trying to download.')
            download_file(url, dnld_path)
            tar_ref = tarfile.open(dnld_path, 'r:gz')
            tar_ref.extractall(dir_dep)
            tar_ref.close()

            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'ncbi-blast'))
            makeblastdb = opj(dir_bin, 'bin', 'makeblastdb')
            blastn = opj(dir_bin, 'bin', 'blastn')
            tblastn = opj(dir_bin, 'bin', 'tblastn')

            if not ope(makeblastdb) or \
                    not ope(blastn) or \
                    not ope(tblastn):
                Log.err('Could not download BLAST+.')
                return None, None, None

    regexp = r'\sblast\s([\d\.]*)'
    v = get_dep_version([makeblastdb, '-version'], regexp)
    Log.msg('makeblastdb is available:', v + ' ' + makeblastdb)
    v = get_dep_version([blastn, '-version'], regexp)
    Log.msg('blastn is available:', v + ' ' + blastn)
    v = get_dep_version([tblastn, '-version'], regexp)
    Log.msg('tblastn is available:', v + ' ' + tblastn)

    return makeblastdb, blastn, tblastn
Example #2
0
def run_blast(exec_file, task, threads, db_path, queries_file, out_file,
              evalue, max_hsps, qcov_hsp_perc, best_hit_overhang,
              best_hit_score_edge, max_target_seqs, db_genetic_code,
              out_cols=BLST_RES_COLS_1):
    """Wrap blastn and tblastn."""
    exec_name = os.path.basename(exec_file)
    if exec_name in ['tblastn', ]:
        db_genetic_code = ['-db_gencode', db_genetic_code]
    else:
        db_genetic_code = []

    cmd = [exec_file,
           '-task', task,
           '-num_threads', str(threads),
           '-db', db_path,
           '-query', queries_file,
           '-out', out_file,
           '-outfmt', '6 delim=\t ' + ' '.join(out_cols),
           '-evalue', str(evalue),
           '-max_hsps', str(max_hsps),
           '-qcov_hsp_perc', str(qcov_hsp_perc),
           '-best_hit_overhang', str(best_hit_overhang),
           '-best_hit_score_edge', str(best_hit_score_edge),
           '-max_target_seqs', str(max_target_seqs),
           ]

    cmd = cmd + db_genetic_code
    run(cmd, do_not_raise=True)
Example #3
0
def run_spades_pe(spades, out_dir, input_files, threads, memory, rna):

    memory = str(memory).split('.')[0]

    cmd = [
        spades,
        '-o',
        out_dir,
        '--pe1-1',
        input_files[0],  # paired_1.fastq
        '--pe1-2',
        input_files[1],  # paired_2.fastq
        '--s1',
        input_files[2],  # unpaired_1.fastq
        '--s2',
        input_files[3],  # unpaired_2.fastq
        '--only-assembler',
        '--threads',
        str(threads),
        '--memory',
        memory,
        '--phred-offset',
        '33'
    ]

    if rna:
        cmd.append('--rna')

    cmd = [PY3] + cmd

    run(cmd, do_not_raise=True)
Example #4
0
def run_bowtie2_pe(bowtie2, input_files, paired_out_pattern,
                   paired_out_pattern_un,
                   unpaired_out_1, unpaired_out_2,
                   unpaired_out_1_un, unpaired_out_2_un, sam_output_file,
                   index, threads, dir_temp):

    temp_unpaired_file = opj(dir_temp, 'temp_unpaired.fastq')
    temp_unpaired_file_un = opj(dir_temp, 'temp_unpaired_un.fastq')

    cmd = [bowtie2, '--threads', str(threads), '--very-sensitive',
           '--phred33', '--no-unal', '--no-mixed', '--no-discordant', '-a',
           '--rdg', '1000,1000',
           '--rfg', '1000,1000',
           '-x', index,
           '-1', input_files[0], '-2', input_files[1],
           '-U', input_files[2] + ',' + input_files[3],
           '--al-conc', paired_out_pattern, '--al', temp_unpaired_file,
           '--un-conc', paired_out_pattern_un, '--un', temp_unpaired_file_un,
           '-S', sam_output_file]

    run(cmd, cwd=dir_temp, do_not_raise=True)
    split_mixed_fq(temp_unpaired_file, unpaired_out_1, unpaired_out_2)
    remove(temp_unpaired_file)
    split_mixed_fq(temp_unpaired_file_un, unpaired_out_1_un, unpaired_out_2_un)
    remove(temp_unpaired_file_un)
Example #5
0
def run_rcorrector_se(rcorrector, in_file, out_dir, threads, dir_temp):

    cmd = [
        rcorrector, '-t',
        str(threads), '-s', in_file, '-k', '23', '-od', out_dir
    ]

    run(cmd, cwd=dir_temp, do_not_raise=True)
Example #6
0
def make_blast_db(exec_file, in_file, out_file, title, dbtype='nucl'):
    """Wrap makeblastdb."""
    cmd = [exec_file,
           '-in', in_file,
           '-out', out_file,
           '-title', title,
           '-dbtype', dbtype]

    run(cmd, do_not_raise=True)
Example #7
0
def run_cluster_fast(vsearch, ident, in_file, out_file):

    cmd = [vsearch,
           '--cluster_fast', in_file,
           '--centroids', out_file,
           '--fasta_width', '0',
           '--id', str(ident)]

    run(cmd, do_not_raise=True)
Example #8
0
def trimmomatic_se(trimmomatic, adapters, in_file, out_file, stats_file,
                   threads, minlen):

    cmd = [
        'java', '-jar', trimmomatic, 'SE', '-threads',
        str(threads), '-phred33', '-summary', stats_file, in_file, out_file,
        'ILLUMINACLIP:' + adapters + ':2:30:10', 'SLIDINGWINDOW:4:20',
        'LEADING:20', 'TRAILING:20', 'MINLEN:' + str(minlen)
    ]

    run(cmd, do_not_raise=True)
Example #9
0
def dep_check_sra_toolkit(dir_dep, os_id, dist_id, debian_dists, redhat_dists,
                          force):
    if os_id == 'mac':
        url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/'
               'sratoolkit.2.10.8-mac64.tar.gz')
    elif os_id == 'linux':
        if dist_id in debian_dists:
            url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/'
                   'sratoolkit.2.10.8-ubuntu64.tar.gz')
        elif dist_id in redhat_dists:
            url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/'
                   'sratoolkit.2.10.8-centos_linux64.tar.gz')

    dnld_path = opj(dir_dep, 'sra-toolkit.tar.gz')

    fasterq_dump = None
    try:
        if force is True:
            raise
        fasterq_dump = which('fasterq-dump')
        dir_bin = dirname(fasterq_dump).strip('bin')
        _ensure_vdb_cfg(dir_bin)
        run(fasterq_dump)
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'sratoolkit'))
            _ensure_vdb_cfg(dir_bin)
            fasterq_dump = opj(dir_bin, 'bin', 'fasterq-dump')
            run(fasterq_dump)
        except Exception:
            Log.wrn('SRA Toolkit was not found on this system, trying to '
                    'download.')
            download_file(url, dnld_path)
            tar_ref = tarfile.open(dnld_path, 'r:gz')
            tar_ref.extractall(dir_dep)
            tar_ref.close()

            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'sratoolkit'))
            fasterq_dump = opj(dir_bin, 'bin', 'fasterq-dump')

            _ensure_vdb_cfg(dir_bin)

            if not ope(fasterq_dump):
                Log.err('Could not download SRA Toolkit.')
                return None

    v = get_dep_version([fasterq_dump, '--version'], r':\s([\d\.]*)')
    if v == '?':
        v = get_dep_version([fasterq_dump, '--version'], r'version\s([\d\.]*)')
    Log.msg('fasterq-dump is available:', v + ' ' + fasterq_dump)

    return fasterq_dump
Example #10
0
def run_rcorrector_pe(rcorrector, in_file_1, in_file_2, out_dir, threads,
                      dir_temp):

    cmd = [rcorrector,
           '-t', str(threads),
           '-1', in_file_1,
           '-2', in_file_2,
           '-k', '20',
           '-maxcorK', '4',
           '-wk', '0.90',
           '-od', out_dir]

    run(cmd, cwd=dir_temp, do_not_raise=True)
Example #11
0
def _ensure_vdb_cfg(dir_bin):
    """
    Ensure that the required configuration files are created without user
    interaction 'vdb-config --interactive'.

    Solves this problem:
        This sra toolkit installation has not been configured.
        Before continuing, please run: vdb-config --interactive
        For more information, see https://www.ncbi.nlm.nih.gov/sra/docs/sra-cloud/

    """
    vdb_config = opj(dir_bin, 'bin', 'vdb-config')
    run([vdb_config, '--interactive'], in_txt='x', do_not_raise=True)
Example #12
0
def dep_check_kakapolib(force=False, quiet=False):
    kkpl = KAKAPOLIB
    if not ope(kkpl):
        if quiet is False:
            Log.wrn('Compiling kakapolib.')
        run(['make', 'install'], cwd=DIR_C_SRC)
    if ope(kkpl):
        if quiet is False:
            Log.msg('kakapolib is available:', kkpl)
    else:
        Log.err('Compilation of kakapolib failed.')
        return None
    return ctypes.CDLL(kkpl)
Example #13
0
def run_bowtie2_se(bowtie2, input_file,
                   output_file, output_file_un, sam_output_file,
                   index, threads, dir_temp):

    cmd = [bowtie2, '--threads', str(threads), '--very-sensitive',
           '--phred33', '--no-unal', '--no-mixed', '--no-discordant', '-a',
           '--rdg', '1000,1000',
           '--rfg', '1000,1000',
           '-x', index,
           '-U', input_file,
           '--al', output_file,
           '--un', output_file_un,
           '-S', sam_output_file]

    run(cmd, cwd=dir_temp, do_not_raise=True)
Example #14
0
def dep_check_bowtie2(dir_dep, os_id, force):
    if os_id == 'mac':
        url = ('https://sourceforge.net/projects/bowtie-bio/files/bowtie2/'
               '2.4.1/bowtie2-2.4.1-macos-x86_64.zip/download')
    elif os_id == 'linux':
        url = ('https://sourceforge.net/projects/bowtie-bio/files/bowtie2/'
               '2.4.1/bowtie2-2.4.1-linux-x86_64.zip/download')

    dnld_path = opj(dir_dep, 'bowtie2.zip')

    try:
        if force is True:
            raise
        bowtie2 = which('bowtie2')
        bowtie2_build = which('bowtie2-build')
        run([bowtie2, '-h'])
        run([bowtie2_build, '-h'])
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'bowtie2'))
            bowtie2 = opj(dir_bin, 'bowtie2')
            bowtie2_build = opj(dir_bin, 'bowtie2-build')
            run([bowtie2, '-h'])
            run([bowtie2_build, '-h'])
        except Exception:
            Log.wrn('Bowtie 2 was not found on this system, trying to '
                    'download.')
            download_file(url, dnld_path)
            zip_ref = zipfile.ZipFile(dnld_path, 'r')
            zip_ref.extractall(dir_dep)
            zip_ref.close()

            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'bowtie2'))
            bowtie2 = opj(dir_bin, 'bowtie2')
            bowtie2_build = opj(dir_bin, 'bowtie2-build')

            bowtie2_execs = ('', '-align-l', '-align-l-debug', '-align-s',
                             '-align-s-debug', '-build', '-build-l',
                             '-build-l-debug', '-build-s', '-build-s-debug',
                             '-inspect', '-inspect-l', '-inspect-l-debug',
                             '-inspect-s', '-inspect-s-debug')

            for bt2exe in bowtie2_execs:
                chmod(
                    bowtie2 + bt2exe, stat.S_IRWXU | stat.S_IRGRP
                    | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)

            if not ope(bowtie2):
                Log.err('Could not download Bowtie 2.')
                return None, None

    regexp = r'^.*?version\s([\d\.]*)'
    v = get_dep_version([bowtie2, '--version'], regexp)
    Log.msg('bowtie2 is available:', v + ' ' + bowtie2)
    v = get_dep_version([bowtie2_build, '--version'], regexp)
    Log.msg('bowtie2-build is available:', v + ' ' + bowtie2_build)

    return bowtie2, bowtie2_build
Example #15
0
def run_spades_se(spades, out_dir, input_file, threads, memory, rna):

    memory = str(memory).split('.')[0]

    cmd = [
        spades, '-o', out_dir, '-s', input_file, '--only-assembler',
        '--threads',
        str(threads), '--memory', memory, '--phred-offset', '33'
    ]

    if rna:
        cmd.append('--rna')

    cmd = [PY3] + cmd

    run(cmd, do_not_raise=True)
Example #16
0
def seqtk_sample_reads(seqtk, in_file, out_file, n, seed=11):
    # n can be a fraction or a number of sequences to sample
    cmd = [seqtk, 'sample', '-2', '-s', str(seed), in_file, n]
    # out is stored in memory, could use a lot of RAM
    out = run(cmd, do_not_raise=True)
    with open(out_file, mode='w') as f:
        f.write(out.stdout)
Example #17
0
def run_kraken_se(kraken, db, in_file, out_class_file, out_unclass_file,
                  report_file, confidence, threads, dir_temp):

    cmd = [
        kraken, '--db', db, '--threads',
        str(threads), '--confidence',
        str(confidence), '--output', '-', '--report', report_file,
        '--use-names', '--classified-out', out_class_file,
        '--unclassified-out', out_unclass_file, in_file
    ]

    mm = _use_memory_mapping(db)
    if mm is not None:
        cmd.insert(1, mm)

    _, _, ext = splitext_gz(in_file)
    if ext is not None:
        cmd.insert(1, '--gzip-compressed')

    run(cmd, cwd=dir_temp, do_not_raise=True)
Example #18
0
def run_vsearch(vsearch, ident, q_file, db_file, out_file, minlen):

    cmd = [vsearch,
           '--usearch_global', q_file,
           '--db', db_file,
           '--userout', out_file,
           '--userfields', 'target',
           '--maxseqlength', '1000',
           '--minseqlength', str(minlen),
           '--threads', '0',
           '--strand', 'both',
           '--maxaccepts', '0',
           '--maxrejects', '0',
           '--iddef', '2',
           '--maxsubs', '3',
           '--maxgaps', '1',
           '--target_cov', '0.33',
           '--id', str(ident)]

    run(cmd, do_not_raise=True)
Example #19
0
def dep_check_vsearch(dir_dep, os_id, dist_id, debian_dists, redhat_dists,
                      force):
    if os_id == 'mac':
        url = ('https://github.com/torognes/vsearch/releases/download/v2.15.0/'
               'vsearch-2.15.0-macos-x86_64.tar.gz')
    elif os_id == 'linux':
        if dist_id in debian_dists:
            url = ('https://github.com/torognes/vsearch/releases/download/'
                   'v2.15.0/vsearch-2.15.0-linux-x86_64.tar.gz')
        elif dist_id in redhat_dists:
            url = ('https://github.com/torognes/vsearch/releases/download/'
                   'v2.15.0/vsearch-2.15.0-linux-x86_64.tar.gz')

    dnld_path = opj(dir_dep, 'vsearch.tar.gz')

    try:
        if force is True:
            raise
        vsearch = which('vsearch')
        run(vsearch)
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'vsearch'))
            vsearch = opj(dir_bin, 'bin', 'vsearch')
            run(vsearch)
        except Exception:
            Log.wrn(
                'Vsearch was not found on this system, trying to download.')
            download_file(url, dnld_path)
            tar_ref = tarfile.open(dnld_path, 'r:gz')
            tar_ref.extractall(dir_dep)
            tar_ref.close()
            try:
                dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'vsearch'))
                vsearch = opj(dir_bin, 'bin', 'vsearch')
                if not ope(vsearch):
                    Log.err('Could not download Vsearch.')
                    return None
                else:
                    run(vsearch)
            except Exception:
                Log.err('Vsearch was downloaded, but does not execute.')
                Log.msg('Try downloading and installing it manually from: '
                        'https://github.com/torognes/vsearch')
                return None

    v = get_dep_version([vsearch, '-version'], r'vsearch\sv([\d\.]*)')
    Log.msg('Vsearch is available:', v + ' ' + vsearch)

    return vsearch
Example #20
0
def dep_check_spades(dir_dep, os_id, force):
    if os_id == 'mac':
        url = ('http://cab.spbu.ru/files/release3.14.1/'
               'SPAdes-3.14.1-Darwin.tar.gz')
    elif os_id == 'linux':
        url = ('http://cab.spbu.ru/files/release3.14.1/'
               'SPAdes-3.14.1-Linux.tar.gz')

    dnld_path = opj(dir_dep, 'SPAdes.tar.gz')

    try:
        if force is True:
            raise
        spades = which('spades.py')
        run([PY3, spades])
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'SPAdes'))
            spades = opj(dir_bin, 'bin', 'spades.py')
            run([PY3, spades])
        except Exception:
            Log.wrn('SPAdes was not found on this system, trying to download.')
            try:
                download_file(url, dnld_path)
                tar_ref = tarfile.open(dnld_path, 'r:gz')
                tar_ref.extractall(dir_dep)
                tar_ref.close()
            except Exception:
                Log.err('Could not download SPAdes.')
                return None
            try:
                dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'SPAdes'))
                spades = opj(dir_bin, 'bin', 'spades.py')
                # replace_line_in_file(spades,
                #                      '#!/usr/bin/env python',
                #                      '#!/usr/bin/env python3')
                if ope(spades):
                    run([PY3, spades])
                else:
                    Log.err('Could not download SPAdes.')
                    return None
            except Exception:
                Log.err('SPAdes was downloaded, but does not execute.')
                return None

    v = get_dep_version([PY3, spades, '--version'], r'^.*SPAdes.*v([\d\.]*)')
    Log.msg('SPAdes is available:', v + ' ' + spades)

    return spades
Example #21
0
def dep_check_seqtk(dir_dep, force):
    url = 'https://github.com/lh3/seqtk/archive/master.zip'
    dnld_path = opj(dir_dep, 'seqtk.zip')
    dir_bin = opj(dir_dep, 'seqtk-master')

    fp = NamedTemporaryFile()
    fp.write(str.encode('>seq' + lns + 'ATGC'))
    fp.seek(0)
    cmd = ['', 'seq', '-r', fp.name]

    try:
        if force is True:
            raise
        seqtk = which('seqtk')
        cmd[0] = seqtk
        run(cmd, do_not_raise=True)
    except Exception:
        try:
            seqtk = opj(dir_bin, 'seqtk')
            cmd[0] = seqtk
            run(cmd, do_not_raise=True)
        except Exception:
            Log.wrn('Seqtk was not found on this system, trying to download.')
            download_file(url, dnld_path)
            zip_ref = zipfile.ZipFile(dnld_path, 'r')
            zip_ref.extractall(dir_dep)
            zip_ref.close()
            try:
                Log.wrn('Compiling Seqtk.')
                run('make', cwd=dir_bin)
                run(cmd, do_not_raise=True)
            except Exception:
                replace_line_in_file(opj(dir_bin, 'Makefile'), 'CC=gcc',
                                     'CC=cc')
                try:
                    run('make', cwd=dir_bin)
                    run(cmd, do_not_raise=True)
                except Exception:
                    Log.err(
                        'Something went wrong while trying to compile Seqtk.')
                    Log.msg('Try downloading and installing it manually from: '
                            'https://github.com/lh3/seqtk')
                    fp.close()
                    return None

    fp.close()

    v = get_dep_version([seqtk], r'Version\:\s([\d\w\.\-]*)')
    Log.msg('Seqtk is available:', v + ' ' + seqtk)

    return seqtk
Example #22
0
def seqtk_fq_to_fa(seqtk, in_file, out_file):
    cmd = [seqtk, 'seq', '-A', in_file]
    # out is stored in memory, could use a lot of RAM
    out = run(cmd, do_not_raise=True)
    with open(out_file, mode='w') as f:
        f.write(out.stdout)
Example #23
0
def seqtk_extract_reads(seqtk, in_file, out_file, ids_file):
    cmd = [seqtk, 'subseq', in_file, ids_file]
    # out is stored in memory, could use a lot of RAM
    out = run(cmd, do_not_raise=True)
    with open(out_file, mode='w') as f:
        f.write(out.stdout)
Example #24
0
def dnld_sra_fastq_files(sras, sra_runs_info, dir_fq_data, fasterq_dump,
                         threads, dir_temp):

    if len(sras) > 0:
        if fasterq_dump is None:
            Log.err('fasterq-dump from SRA Toolkit is not available. ' +
                    'Cannot continue. Exiting.')
            exit(0)

        print()
        Log.inf('Downloading SRA read data.')

    se_fastq_files = {}
    pe_fastq_files = {}

    for sra in sras:
        sra_run_info = sra_runs_info[sra]
        sra_lib_layout = sra_run_info['LibraryLayout'].lower()
        sra_lib_layout_k = sra_run_info['KakapoLibraryLayout'].lower()
        sample_base_name = sra_run_info['KakapoSampleBaseName']
        sra_taxid = int(sra_run_info['TaxID'])
        avg_len = int(sra_run_info['avgLength'])

        sra_dnld_needed = False

        if sra_lib_layout == 'single' or sra_lib_layout_k == 'single':
            se_file = opj(dir_fq_data, sra + '.fastq')
            se_fastq_files[sample_base_name] = {'path': se_file}
            se_fastq_files[sample_base_name]['src'] = 'sra'
            se_fastq_files[sample_base_name]['avg_len'] = avg_len
            se_fastq_files[sample_base_name]['tax_id'] = sra_taxid
            if not ope(se_file):
                sra_dnld_needed = True

        elif sra_lib_layout == 'paired':
            pe_file_1 = opj(dir_fq_data, sra + '_1.fastq')
            pe_file_2 = opj(dir_fq_data, sra + '_2.fastq')
            pe_file_1_renamed = opj(dir_fq_data, sra + '_R1.fastq')
            pe_file_2_renamed = opj(dir_fq_data, sra + '_R2.fastq')
            pe_fastq_files[sample_base_name] = {
                'path': [pe_file_1_renamed, pe_file_2_renamed]
            }
            pe_fastq_files[sample_base_name]['src'] = 'sra'
            pe_fastq_files[sample_base_name]['avg_len'] = avg_len // 2
            pe_fastq_files[sample_base_name]['tax_id'] = sra_taxid
            if sra_lib_layout_k == 'paired_unp':
                pe_file_3 = opj(dir_fq_data, sra + '.fastq')
                pe_file_3_renamed = opj(dir_fq_data, sra + '_R3.fastq')
                pe_fastq_files[sample_base_name]['path'].append(
                    pe_file_3_renamed)
            if not ope(pe_file_1_renamed) or not ope(pe_file_2_renamed):
                sra_dnld_needed = True

        if not sra_dnld_needed:
            Log.msg('FASTQ reads are available locally:', sample_base_name)

        retry_count = 0
        while sra_dnld_needed:

            if retry_count > 50:
                Log.err('Download failed. Exiting.')
                rmtree(dir_temp)
                exit(1)

            elif retry_count > 0:
                Log.wrn('Download failed. Retrying.')
                sleep(2)

            retry_count += 1

            Log.msg('Downloading FASTQ reads for:', sample_base_name)

            cmd = [
                fasterq_dump, '--threads',
                str(threads * 2), '--split-3', '--bufsize', '819200',
                '--outdir', dir_fq_data, '--temp', dir_temp, sra
            ]

            run(cmd, do_not_raise=True)

            if sra_lib_layout == 'single' or sra_lib_layout_k == 'single':
                if not ope(se_file):
                    continue

            elif sra_lib_layout == 'paired':

                if not ope(pe_file_1) or not ope(pe_file_2):
                    continue
                else:
                    move(pe_file_1, pe_file_1_renamed)
                    move(pe_file_2, pe_file_2_renamed)

                if sra_lib_layout_k == 'paired_unp':
                    if not ope(pe_file_3):
                        continue
                    else:
                        move(pe_file_3, pe_file_3_renamed)

            sra_dnld_needed = False

            if sra_lib_layout == 'single' or sra_lib_layout_k == 'single':
                if ope(se_file):
                    Log.msg('Renaming FASTQ reads in:', se_file)
                    rename_fq_seqs(se_file, sra, '1:N:0')

            elif sra_lib_layout == 'paired':
                if ope(pe_file_1_renamed):
                    Log.msg('Renaming FASTQ reads in:', pe_file_1_renamed)
                    rename_fq_seqs(pe_file_1_renamed, sra, '1:N:0')
                if ope(pe_file_2_renamed):
                    Log.msg('Renaming FASTQ reads in:', pe_file_2_renamed)
                    rename_fq_seqs(pe_file_2_renamed, sra, '2:N:0')
                if sra_lib_layout_k == 'paired_unp':
                    if ope(pe_file_3_renamed):
                        Log.msg('Renaming FASTQ reads in:', pe_file_3_renamed)
                        rename_fq_seqs(pe_file_3_renamed, sra + '_unpaired',
                                       '1:N:0')

    return se_fastq_files, pe_fastq_files, sra_runs_info
Example #25
0
def dep_check_rcorrector(dir_dep, force):
    url = 'https://github.com/karolisr/Rcorrector/archive/master.tar.gz'
    dnld_path = opj(dir_dep, 'rcorrector.tar.gz')

    try:
        try:
            jellyfish = which('jellyfish')
            run([jellyfish, '--help'])
        except Exception:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector'))
            jellyfish = opj(dir_bin, 'jellyfish', 'bin', 'jellyfish')
            raise
        if force is True:
            raise
        rcorrector = which('run_rcorrector.pl')
        run([rcorrector, '-version'])
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector'))
            try:
                rcorrector = opj(dir_bin, 'run_rcorrector.pl')
                run([rcorrector, '-version'])
            except Exception:
                Log.wrn('Rcorrector was not found on this system, trying to '
                        'download.')
                raise
            try:
                run([jellyfish, '--version'])
            except Exception:
                Log.wrn(
                    'jellyfish is required by Rcorrector, but was not found. '
                    'Trying to download and recompile Rcorrector and '
                    'jellyfish.')
                raise
        except Exception:
            if ope(dnld_path):
                remove(dnld_path)
            if dir_bin != opj(dir_dep, ''):
                rmtree(dir_bin)
            download_file(url, dnld_path)
            tar_ref = tarfile.open(dnld_path, 'r:gz')
            tar_ref.extractall(dir_dep)
            tar_ref.close()
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector'))
            try:
                Log.wrn('Compiling Rcorrector.')
                run('make', cwd=dir_bin)
                rcorrector = opj(dir_bin, 'run_rcorrector.pl')
                jellyfish = opj(dir_bin, 'jellyfish', 'bin', 'jellyfish')
                chmod(
                    rcorrector, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                    | stat.S_IROTH | stat.S_IXOTH)
                run([rcorrector, '-version'])
                if not ope(jellyfish):
                    jellyfish = which('jellyfish')
                run([jellyfish, '--version'])
            except Exception:
                Log.err('Something went wrong while trying to compile '
                        'Rcorrector.')
                Log.msg('Try downloading and installing it manually from: '
                        'https://github.com/karolisr/Rcorrector')
                return None

    v = get_dep_version([rcorrector, '-version'], r'^Rcorrector\sv([\d\.]*)')
    Log.msg('Rcorrector is available:', v + ' ' + rcorrector)

    return rcorrector
Example #26
0
def build_bt2_index(bowtie2_build, input_files, output_path, threads):
    cmd = [bowtie2_build, '--threads', str(threads),
           ','.join(input_files), output_path]

    run(cmd, do_not_raise=True)
Example #27
0
def dep_check_kraken2(dir_dep, os_id, release_name, force):
    url = 'https://github.com/karolisr/kraken2/archive/master.tar.gz'

    dnld_path = opj(dir_dep, 'kraken2.tar.gz')

    try:
        if force is True:
            raise
        kraken2 = which('kraken2')
        kraken2_build = which('kraken2-build')

        dir_bin = dirname(kraken2)
        classify_bin = opj(dir_bin, 'classify')
        _ = run([classify_bin], do_not_raise=True)
        if not _.stderr.startswith('classify: mandatory filename'):
            raise

        run([kraken2, '--help'])
        run([kraken2_build, '--help'])
    except Exception:
        try:
            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'kraken2'))
            kraken2 = opj(dir_bin, 'bin', 'kraken2')
            kraken2_build = opj(dir_bin, 'bin', 'kraken2-build')

            classify_bin = opj(dir_bin, 'bin', 'classify')
            _ = run([classify_bin], do_not_raise=True)
            if not _.stderr.startswith('classify: mandatory filename'):
                raise

            run([kraken2, '--help'])
            run([kraken2_build, '--help'])
        except Exception:
            Log.wrn('Kraken2 was not found on this system, trying to '
                    'download.')

            if ope(dnld_path):
                remove(dnld_path)

            download_file(url, dnld_path)
            tar_ref = tarfile.open(dnld_path, 'r:gz')
            tar_ref.extractall(dir_dep)
            tar_ref.close()

            dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'kraken2'))
            classify_bin = opj(dir_bin, 'bin', 'classify')
            kraken2 = opj(dir_bin, 'bin', 'kraken2')
            kraken2_build = opj(dir_bin, 'bin', 'kraken2-build')

            makefile = opj(dir_bin, 'src', 'Makefile')
            replace_line_in_file(makefile, 'cp $(PROGS) $(KRAKEN2_DIR)/',
                                 'cp $(PROGS) "$(KRAKEN2_DIR)"/')
            try:
                Log.wrn('Compiling Kraken2 Attempt 1')
                run(['./install_kraken2.sh', 'bin'], cwd=dir_bin)

                _ = run([classify_bin], do_not_raise=True)
                if not _.stderr.startswith('classify: mandatory filename'):
                    raise

                run([kraken2, '--help'])
                run([kraken2_build, '--help'])

            except Exception:
                try:
                    Log.wrn('Compiling Kraken2 Attempt 2')

                    dir_libomp = opj(dir_dep, 'libomp')

                    if ope(dir_libomp):
                        rmtree(dir_libomp)

                    libomp_fp, v = brew_get('libomp', os_id, release_name,
                                            dir_dep)

                    tar_ref = tarfile.open(libomp_fp, 'r:gz')
                    tar_ref.extractall(dir_dep)
                    tar_ref.close()

                    dir_libomp_l = opj(dir_libomp, v, 'lib')
                    dir_libomp_i = opj(dir_libomp, v, 'include')

                    if os_id == 'mac':
                        # Changes the shared library identification name of a
                        # dynamic shared library.
                        dylib_f = opj(dir_libomp_l, 'libomp.dylib')

                        chmod(
                            dylib_f, stat.S_IRWXU | stat.S_IRUSR | stat.S_IWUSR
                            | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH
                            | stat.S_IWOTH)

                        cmd = ['install_name_tool', '-id', dylib_f, dylib_f]
                        run(cmd)

                        cxx_flags = ('CXXFLAGS = -L{} -I{} -Xpreprocessor '
                                     '-fopenmp -lomp -Wall -std=c++11 -O3')

                    elif os_id == 'linux':
                        cxx_flags = ('CXXFLAGS = -L{} -I{} -fopenmp -lomp '
                                     '-static -Wall -std=c++11 -O3')

                    cxx_flags = cxx_flags.format(dir_libomp_l, dir_libomp_i)

                    makefile = opj(dir_bin, 'src', 'Makefile')

                    replace_line_in_file(
                        makefile, 'CXXFLAGS = -fopenmp -Wall -std=c++11'
                        ' -O3', cxx_flags)

                    run(['./install_kraken2.sh', 'bin'], cwd=dir_bin)

                    _ = run([classify_bin], do_not_raise=True)
                    if not _.stderr.startswith('classify: mandatory filename'):
                        raise

                    run([kraken2, '--help'])
                    run([kraken2_build, '--help'])

                except Exception:
                    try:
                        Log.wrn('Compiling Kraken2 Attempt 3')
                        makefile = opj(dir_bin, 'src', 'Makefile')
                        replace_line_in_file(
                            makefile, cxx_flags,
                            'CXXFLAGS = -Wall -std=c++11 -O3')
                        run(['./install_kraken2.sh', 'bin'], cwd=dir_bin)

                        _ = run([classify_bin], do_not_raise=True)
                        if not _.stderr.startswith(
                                'classify: mandatory filename'):
                            raise

                        run([kraken2, '--help'])
                        run([kraken2_build, '--help'])
                    except Exception:
                        pass

            if not ope(kraken2):
                Log.err('Something went wrong while trying to compile '
                        'Kraken2.')
                Log.msg('Try downloading and installing it manually from: '
                        'https://github.com/karolisr/kraken2')
                return None, None

    regexp = r'^.*?version\s([\d\.\-A-Za-z]*)'
    v = get_dep_version([kraken2, '--version'], regexp)
    Log.msg('kraken2 is available:', v + ' ' + kraken2)
    v = get_dep_version([kraken2_build, '--version'], regexp)
    Log.msg('kraken2-build is available:', v + ' ' + kraken2_build)

    return kraken2, kraken2_build