def dep_check_blast(dir_dep, os_id, dist_id, debian_dists, redhat_dists, force): if os_id == 'mac': url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.10.1/' 'ncbi-blast-2.10.1+-x64-macosx.tar.gz') elif os_id == 'linux': if dist_id in debian_dists: url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/' '2.10.1/ncbi-blast-2.10.1+-x64-linux.tar.gz') elif dist_id in redhat_dists: url = ('https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/' '2.10.1/ncbi-blast-2.10.1+-x64-linux.tar.gz') dnld_path = opj(dir_dep, 'ncbi-blast.tar.gz') makeblastdb = None blastn = None tblastn = None try: if force is True: raise makeblastdb = which('makeblastdb') blastn = which('blastn') tblastn = which('tblastn') run([makeblastdb, '-help']) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'ncbi-blast')) makeblastdb = opj(dir_bin, 'bin', 'makeblastdb') blastn = opj(dir_bin, 'bin', 'blastn') tblastn = opj(dir_bin, 'bin', 'tblastn') run([makeblastdb, '-help']) except Exception: Log.wrn('BLAST+ was not found on this system, trying to download.') download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'ncbi-blast')) makeblastdb = opj(dir_bin, 'bin', 'makeblastdb') blastn = opj(dir_bin, 'bin', 'blastn') tblastn = opj(dir_bin, 'bin', 'tblastn') if not ope(makeblastdb) or \ not ope(blastn) or \ not ope(tblastn): Log.err('Could not download BLAST+.') return None, None, None regexp = r'\sblast\s([\d\.]*)' v = get_dep_version([makeblastdb, '-version'], regexp) Log.msg('makeblastdb is available:', v + ' ' + makeblastdb) v = get_dep_version([blastn, '-version'], regexp) Log.msg('blastn is available:', v + ' ' + blastn) v = get_dep_version([tblastn, '-version'], regexp) Log.msg('tblastn is available:', v + ' ' + tblastn) return makeblastdb, blastn, tblastn
def run_blast(exec_file, task, threads, db_path, queries_file, out_file, evalue, max_hsps, qcov_hsp_perc, best_hit_overhang, best_hit_score_edge, max_target_seqs, db_genetic_code, out_cols=BLST_RES_COLS_1): """Wrap blastn and tblastn.""" exec_name = os.path.basename(exec_file) if exec_name in ['tblastn', ]: db_genetic_code = ['-db_gencode', db_genetic_code] else: db_genetic_code = [] cmd = [exec_file, '-task', task, '-num_threads', str(threads), '-db', db_path, '-query', queries_file, '-out', out_file, '-outfmt', '6 delim=\t ' + ' '.join(out_cols), '-evalue', str(evalue), '-max_hsps', str(max_hsps), '-qcov_hsp_perc', str(qcov_hsp_perc), '-best_hit_overhang', str(best_hit_overhang), '-best_hit_score_edge', str(best_hit_score_edge), '-max_target_seqs', str(max_target_seqs), ] cmd = cmd + db_genetic_code run(cmd, do_not_raise=True)
def run_spades_pe(spades, out_dir, input_files, threads, memory, rna): memory = str(memory).split('.')[0] cmd = [ spades, '-o', out_dir, '--pe1-1', input_files[0], # paired_1.fastq '--pe1-2', input_files[1], # paired_2.fastq '--s1', input_files[2], # unpaired_1.fastq '--s2', input_files[3], # unpaired_2.fastq '--only-assembler', '--threads', str(threads), '--memory', memory, '--phred-offset', '33' ] if rna: cmd.append('--rna') cmd = [PY3] + cmd run(cmd, do_not_raise=True)
def run_bowtie2_pe(bowtie2, input_files, paired_out_pattern, paired_out_pattern_un, unpaired_out_1, unpaired_out_2, unpaired_out_1_un, unpaired_out_2_un, sam_output_file, index, threads, dir_temp): temp_unpaired_file = opj(dir_temp, 'temp_unpaired.fastq') temp_unpaired_file_un = opj(dir_temp, 'temp_unpaired_un.fastq') cmd = [bowtie2, '--threads', str(threads), '--very-sensitive', '--phred33', '--no-unal', '--no-mixed', '--no-discordant', '-a', '--rdg', '1000,1000', '--rfg', '1000,1000', '-x', index, '-1', input_files[0], '-2', input_files[1], '-U', input_files[2] + ',' + input_files[3], '--al-conc', paired_out_pattern, '--al', temp_unpaired_file, '--un-conc', paired_out_pattern_un, '--un', temp_unpaired_file_un, '-S', sam_output_file] run(cmd, cwd=dir_temp, do_not_raise=True) split_mixed_fq(temp_unpaired_file, unpaired_out_1, unpaired_out_2) remove(temp_unpaired_file) split_mixed_fq(temp_unpaired_file_un, unpaired_out_1_un, unpaired_out_2_un) remove(temp_unpaired_file_un)
def run_rcorrector_se(rcorrector, in_file, out_dir, threads, dir_temp): cmd = [ rcorrector, '-t', str(threads), '-s', in_file, '-k', '23', '-od', out_dir ] run(cmd, cwd=dir_temp, do_not_raise=True)
def make_blast_db(exec_file, in_file, out_file, title, dbtype='nucl'): """Wrap makeblastdb.""" cmd = [exec_file, '-in', in_file, '-out', out_file, '-title', title, '-dbtype', dbtype] run(cmd, do_not_raise=True)
def run_cluster_fast(vsearch, ident, in_file, out_file): cmd = [vsearch, '--cluster_fast', in_file, '--centroids', out_file, '--fasta_width', '0', '--id', str(ident)] run(cmd, do_not_raise=True)
def trimmomatic_se(trimmomatic, adapters, in_file, out_file, stats_file, threads, minlen): cmd = [ 'java', '-jar', trimmomatic, 'SE', '-threads', str(threads), '-phred33', '-summary', stats_file, in_file, out_file, 'ILLUMINACLIP:' + adapters + ':2:30:10', 'SLIDINGWINDOW:4:20', 'LEADING:20', 'TRAILING:20', 'MINLEN:' + str(minlen) ] run(cmd, do_not_raise=True)
def dep_check_sra_toolkit(dir_dep, os_id, dist_id, debian_dists, redhat_dists, force): if os_id == 'mac': url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/' 'sratoolkit.2.10.8-mac64.tar.gz') elif os_id == 'linux': if dist_id in debian_dists: url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/' 'sratoolkit.2.10.8-ubuntu64.tar.gz') elif dist_id in redhat_dists: url = ('https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.10.8/' 'sratoolkit.2.10.8-centos_linux64.tar.gz') dnld_path = opj(dir_dep, 'sra-toolkit.tar.gz') fasterq_dump = None try: if force is True: raise fasterq_dump = which('fasterq-dump') dir_bin = dirname(fasterq_dump).strip('bin') _ensure_vdb_cfg(dir_bin) run(fasterq_dump) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'sratoolkit')) _ensure_vdb_cfg(dir_bin) fasterq_dump = opj(dir_bin, 'bin', 'fasterq-dump') run(fasterq_dump) except Exception: Log.wrn('SRA Toolkit was not found on this system, trying to ' 'download.') download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'sratoolkit')) fasterq_dump = opj(dir_bin, 'bin', 'fasterq-dump') _ensure_vdb_cfg(dir_bin) if not ope(fasterq_dump): Log.err('Could not download SRA Toolkit.') return None v = get_dep_version([fasterq_dump, '--version'], r':\s([\d\.]*)') if v == '?': v = get_dep_version([fasterq_dump, '--version'], r'version\s([\d\.]*)') Log.msg('fasterq-dump is available:', v + ' ' + fasterq_dump) return fasterq_dump
def run_rcorrector_pe(rcorrector, in_file_1, in_file_2, out_dir, threads, dir_temp): cmd = [rcorrector, '-t', str(threads), '-1', in_file_1, '-2', in_file_2, '-k', '20', '-maxcorK', '4', '-wk', '0.90', '-od', out_dir] run(cmd, cwd=dir_temp, do_not_raise=True)
def _ensure_vdb_cfg(dir_bin): """ Ensure that the required configuration files are created without user interaction 'vdb-config --interactive'. Solves this problem: This sra toolkit installation has not been configured. Before continuing, please run: vdb-config --interactive For more information, see https://www.ncbi.nlm.nih.gov/sra/docs/sra-cloud/ """ vdb_config = opj(dir_bin, 'bin', 'vdb-config') run([vdb_config, '--interactive'], in_txt='x', do_not_raise=True)
def dep_check_kakapolib(force=False, quiet=False): kkpl = KAKAPOLIB if not ope(kkpl): if quiet is False: Log.wrn('Compiling kakapolib.') run(['make', 'install'], cwd=DIR_C_SRC) if ope(kkpl): if quiet is False: Log.msg('kakapolib is available:', kkpl) else: Log.err('Compilation of kakapolib failed.') return None return ctypes.CDLL(kkpl)
def run_bowtie2_se(bowtie2, input_file, output_file, output_file_un, sam_output_file, index, threads, dir_temp): cmd = [bowtie2, '--threads', str(threads), '--very-sensitive', '--phred33', '--no-unal', '--no-mixed', '--no-discordant', '-a', '--rdg', '1000,1000', '--rfg', '1000,1000', '-x', index, '-U', input_file, '--al', output_file, '--un', output_file_un, '-S', sam_output_file] run(cmd, cwd=dir_temp, do_not_raise=True)
def dep_check_bowtie2(dir_dep, os_id, force): if os_id == 'mac': url = ('https://sourceforge.net/projects/bowtie-bio/files/bowtie2/' '2.4.1/bowtie2-2.4.1-macos-x86_64.zip/download') elif os_id == 'linux': url = ('https://sourceforge.net/projects/bowtie-bio/files/bowtie2/' '2.4.1/bowtie2-2.4.1-linux-x86_64.zip/download') dnld_path = opj(dir_dep, 'bowtie2.zip') try: if force is True: raise bowtie2 = which('bowtie2') bowtie2_build = which('bowtie2-build') run([bowtie2, '-h']) run([bowtie2_build, '-h']) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'bowtie2')) bowtie2 = opj(dir_bin, 'bowtie2') bowtie2_build = opj(dir_bin, 'bowtie2-build') run([bowtie2, '-h']) run([bowtie2_build, '-h']) except Exception: Log.wrn('Bowtie 2 was not found on this system, trying to ' 'download.') download_file(url, dnld_path) zip_ref = zipfile.ZipFile(dnld_path, 'r') zip_ref.extractall(dir_dep) zip_ref.close() dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'bowtie2')) bowtie2 = opj(dir_bin, 'bowtie2') bowtie2_build = opj(dir_bin, 'bowtie2-build') bowtie2_execs = ('', '-align-l', '-align-l-debug', '-align-s', '-align-s-debug', '-build', '-build-l', '-build-l-debug', '-build-s', '-build-s-debug', '-inspect', '-inspect-l', '-inspect-l-debug', '-inspect-s', '-inspect-s-debug') for bt2exe in bowtie2_execs: chmod( bowtie2 + bt2exe, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) if not ope(bowtie2): Log.err('Could not download Bowtie 2.') return None, None regexp = r'^.*?version\s([\d\.]*)' v = get_dep_version([bowtie2, '--version'], regexp) Log.msg('bowtie2 is available:', v + ' ' + bowtie2) v = get_dep_version([bowtie2_build, '--version'], regexp) Log.msg('bowtie2-build is available:', v + ' ' + bowtie2_build) return bowtie2, bowtie2_build
def run_spades_se(spades, out_dir, input_file, threads, memory, rna): memory = str(memory).split('.')[0] cmd = [ spades, '-o', out_dir, '-s', input_file, '--only-assembler', '--threads', str(threads), '--memory', memory, '--phred-offset', '33' ] if rna: cmd.append('--rna') cmd = [PY3] + cmd run(cmd, do_not_raise=True)
def seqtk_sample_reads(seqtk, in_file, out_file, n, seed=11): # n can be a fraction or a number of sequences to sample cmd = [seqtk, 'sample', '-2', '-s', str(seed), in_file, n] # out is stored in memory, could use a lot of RAM out = run(cmd, do_not_raise=True) with open(out_file, mode='w') as f: f.write(out.stdout)
def run_kraken_se(kraken, db, in_file, out_class_file, out_unclass_file, report_file, confidence, threads, dir_temp): cmd = [ kraken, '--db', db, '--threads', str(threads), '--confidence', str(confidence), '--output', '-', '--report', report_file, '--use-names', '--classified-out', out_class_file, '--unclassified-out', out_unclass_file, in_file ] mm = _use_memory_mapping(db) if mm is not None: cmd.insert(1, mm) _, _, ext = splitext_gz(in_file) if ext is not None: cmd.insert(1, '--gzip-compressed') run(cmd, cwd=dir_temp, do_not_raise=True)
def run_vsearch(vsearch, ident, q_file, db_file, out_file, minlen): cmd = [vsearch, '--usearch_global', q_file, '--db', db_file, '--userout', out_file, '--userfields', 'target', '--maxseqlength', '1000', '--minseqlength', str(minlen), '--threads', '0', '--strand', 'both', '--maxaccepts', '0', '--maxrejects', '0', '--iddef', '2', '--maxsubs', '3', '--maxgaps', '1', '--target_cov', '0.33', '--id', str(ident)] run(cmd, do_not_raise=True)
def dep_check_vsearch(dir_dep, os_id, dist_id, debian_dists, redhat_dists, force): if os_id == 'mac': url = ('https://github.com/torognes/vsearch/releases/download/v2.15.0/' 'vsearch-2.15.0-macos-x86_64.tar.gz') elif os_id == 'linux': if dist_id in debian_dists: url = ('https://github.com/torognes/vsearch/releases/download/' 'v2.15.0/vsearch-2.15.0-linux-x86_64.tar.gz') elif dist_id in redhat_dists: url = ('https://github.com/torognes/vsearch/releases/download/' 'v2.15.0/vsearch-2.15.0-linux-x86_64.tar.gz') dnld_path = opj(dir_dep, 'vsearch.tar.gz') try: if force is True: raise vsearch = which('vsearch') run(vsearch) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'vsearch')) vsearch = opj(dir_bin, 'bin', 'vsearch') run(vsearch) except Exception: Log.wrn( 'Vsearch was not found on this system, trying to download.') download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'vsearch')) vsearch = opj(dir_bin, 'bin', 'vsearch') if not ope(vsearch): Log.err('Could not download Vsearch.') return None else: run(vsearch) except Exception: Log.err('Vsearch was downloaded, but does not execute.') Log.msg('Try downloading and installing it manually from: ' 'https://github.com/torognes/vsearch') return None v = get_dep_version([vsearch, '-version'], r'vsearch\sv([\d\.]*)') Log.msg('Vsearch is available:', v + ' ' + vsearch) return vsearch
def dep_check_spades(dir_dep, os_id, force): if os_id == 'mac': url = ('http://cab.spbu.ru/files/release3.14.1/' 'SPAdes-3.14.1-Darwin.tar.gz') elif os_id == 'linux': url = ('http://cab.spbu.ru/files/release3.14.1/' 'SPAdes-3.14.1-Linux.tar.gz') dnld_path = opj(dir_dep, 'SPAdes.tar.gz') try: if force is True: raise spades = which('spades.py') run([PY3, spades]) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'SPAdes')) spades = opj(dir_bin, 'bin', 'spades.py') run([PY3, spades]) except Exception: Log.wrn('SPAdes was not found on this system, trying to download.') try: download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() except Exception: Log.err('Could not download SPAdes.') return None try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'SPAdes')) spades = opj(dir_bin, 'bin', 'spades.py') # replace_line_in_file(spades, # '#!/usr/bin/env python', # '#!/usr/bin/env python3') if ope(spades): run([PY3, spades]) else: Log.err('Could not download SPAdes.') return None except Exception: Log.err('SPAdes was downloaded, but does not execute.') return None v = get_dep_version([PY3, spades, '--version'], r'^.*SPAdes.*v([\d\.]*)') Log.msg('SPAdes is available:', v + ' ' + spades) return spades
def dep_check_seqtk(dir_dep, force): url = 'https://github.com/lh3/seqtk/archive/master.zip' dnld_path = opj(dir_dep, 'seqtk.zip') dir_bin = opj(dir_dep, 'seqtk-master') fp = NamedTemporaryFile() fp.write(str.encode('>seq' + lns + 'ATGC')) fp.seek(0) cmd = ['', 'seq', '-r', fp.name] try: if force is True: raise seqtk = which('seqtk') cmd[0] = seqtk run(cmd, do_not_raise=True) except Exception: try: seqtk = opj(dir_bin, 'seqtk') cmd[0] = seqtk run(cmd, do_not_raise=True) except Exception: Log.wrn('Seqtk was not found on this system, trying to download.') download_file(url, dnld_path) zip_ref = zipfile.ZipFile(dnld_path, 'r') zip_ref.extractall(dir_dep) zip_ref.close() try: Log.wrn('Compiling Seqtk.') run('make', cwd=dir_bin) run(cmd, do_not_raise=True) except Exception: replace_line_in_file(opj(dir_bin, 'Makefile'), 'CC=gcc', 'CC=cc') try: run('make', cwd=dir_bin) run(cmd, do_not_raise=True) except Exception: Log.err( 'Something went wrong while trying to compile Seqtk.') Log.msg('Try downloading and installing it manually from: ' 'https://github.com/lh3/seqtk') fp.close() return None fp.close() v = get_dep_version([seqtk], r'Version\:\s([\d\w\.\-]*)') Log.msg('Seqtk is available:', v + ' ' + seqtk) return seqtk
def seqtk_fq_to_fa(seqtk, in_file, out_file): cmd = [seqtk, 'seq', '-A', in_file] # out is stored in memory, could use a lot of RAM out = run(cmd, do_not_raise=True) with open(out_file, mode='w') as f: f.write(out.stdout)
def seqtk_extract_reads(seqtk, in_file, out_file, ids_file): cmd = [seqtk, 'subseq', in_file, ids_file] # out is stored in memory, could use a lot of RAM out = run(cmd, do_not_raise=True) with open(out_file, mode='w') as f: f.write(out.stdout)
def dnld_sra_fastq_files(sras, sra_runs_info, dir_fq_data, fasterq_dump, threads, dir_temp): if len(sras) > 0: if fasterq_dump is None: Log.err('fasterq-dump from SRA Toolkit is not available. ' + 'Cannot continue. Exiting.') exit(0) print() Log.inf('Downloading SRA read data.') se_fastq_files = {} pe_fastq_files = {} for sra in sras: sra_run_info = sra_runs_info[sra] sra_lib_layout = sra_run_info['LibraryLayout'].lower() sra_lib_layout_k = sra_run_info['KakapoLibraryLayout'].lower() sample_base_name = sra_run_info['KakapoSampleBaseName'] sra_taxid = int(sra_run_info['TaxID']) avg_len = int(sra_run_info['avgLength']) sra_dnld_needed = False if sra_lib_layout == 'single' or sra_lib_layout_k == 'single': se_file = opj(dir_fq_data, sra + '.fastq') se_fastq_files[sample_base_name] = {'path': se_file} se_fastq_files[sample_base_name]['src'] = 'sra' se_fastq_files[sample_base_name]['avg_len'] = avg_len se_fastq_files[sample_base_name]['tax_id'] = sra_taxid if not ope(se_file): sra_dnld_needed = True elif sra_lib_layout == 'paired': pe_file_1 = opj(dir_fq_data, sra + '_1.fastq') pe_file_2 = opj(dir_fq_data, sra + '_2.fastq') pe_file_1_renamed = opj(dir_fq_data, sra + '_R1.fastq') pe_file_2_renamed = opj(dir_fq_data, sra + '_R2.fastq') pe_fastq_files[sample_base_name] = { 'path': [pe_file_1_renamed, pe_file_2_renamed] } pe_fastq_files[sample_base_name]['src'] = 'sra' pe_fastq_files[sample_base_name]['avg_len'] = avg_len // 2 pe_fastq_files[sample_base_name]['tax_id'] = sra_taxid if sra_lib_layout_k == 'paired_unp': pe_file_3 = opj(dir_fq_data, sra + '.fastq') pe_file_3_renamed = opj(dir_fq_data, sra + '_R3.fastq') pe_fastq_files[sample_base_name]['path'].append( pe_file_3_renamed) if not ope(pe_file_1_renamed) or not ope(pe_file_2_renamed): sra_dnld_needed = True if not sra_dnld_needed: Log.msg('FASTQ reads are available locally:', sample_base_name) retry_count = 0 while sra_dnld_needed: if retry_count > 50: Log.err('Download failed. Exiting.') rmtree(dir_temp) exit(1) elif retry_count > 0: Log.wrn('Download failed. Retrying.') sleep(2) retry_count += 1 Log.msg('Downloading FASTQ reads for:', sample_base_name) cmd = [ fasterq_dump, '--threads', str(threads * 2), '--split-3', '--bufsize', '819200', '--outdir', dir_fq_data, '--temp', dir_temp, sra ] run(cmd, do_not_raise=True) if sra_lib_layout == 'single' or sra_lib_layout_k == 'single': if not ope(se_file): continue elif sra_lib_layout == 'paired': if not ope(pe_file_1) or not ope(pe_file_2): continue else: move(pe_file_1, pe_file_1_renamed) move(pe_file_2, pe_file_2_renamed) if sra_lib_layout_k == 'paired_unp': if not ope(pe_file_3): continue else: move(pe_file_3, pe_file_3_renamed) sra_dnld_needed = False if sra_lib_layout == 'single' or sra_lib_layout_k == 'single': if ope(se_file): Log.msg('Renaming FASTQ reads in:', se_file) rename_fq_seqs(se_file, sra, '1:N:0') elif sra_lib_layout == 'paired': if ope(pe_file_1_renamed): Log.msg('Renaming FASTQ reads in:', pe_file_1_renamed) rename_fq_seqs(pe_file_1_renamed, sra, '1:N:0') if ope(pe_file_2_renamed): Log.msg('Renaming FASTQ reads in:', pe_file_2_renamed) rename_fq_seqs(pe_file_2_renamed, sra, '2:N:0') if sra_lib_layout_k == 'paired_unp': if ope(pe_file_3_renamed): Log.msg('Renaming FASTQ reads in:', pe_file_3_renamed) rename_fq_seqs(pe_file_3_renamed, sra + '_unpaired', '1:N:0') return se_fastq_files, pe_fastq_files, sra_runs_info
def dep_check_rcorrector(dir_dep, force): url = 'https://github.com/karolisr/Rcorrector/archive/master.tar.gz' dnld_path = opj(dir_dep, 'rcorrector.tar.gz') try: try: jellyfish = which('jellyfish') run([jellyfish, '--help']) except Exception: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector')) jellyfish = opj(dir_bin, 'jellyfish', 'bin', 'jellyfish') raise if force is True: raise rcorrector = which('run_rcorrector.pl') run([rcorrector, '-version']) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector')) try: rcorrector = opj(dir_bin, 'run_rcorrector.pl') run([rcorrector, '-version']) except Exception: Log.wrn('Rcorrector was not found on this system, trying to ' 'download.') raise try: run([jellyfish, '--version']) except Exception: Log.wrn( 'jellyfish is required by Rcorrector, but was not found. ' 'Trying to download and recompile Rcorrector and ' 'jellyfish.') raise except Exception: if ope(dnld_path): remove(dnld_path) if dir_bin != opj(dir_dep, ''): rmtree(dir_bin) download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'Rcorrector')) try: Log.wrn('Compiling Rcorrector.') run('make', cwd=dir_bin) rcorrector = opj(dir_bin, 'run_rcorrector.pl') jellyfish = opj(dir_bin, 'jellyfish', 'bin', 'jellyfish') chmod( rcorrector, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) run([rcorrector, '-version']) if not ope(jellyfish): jellyfish = which('jellyfish') run([jellyfish, '--version']) except Exception: Log.err('Something went wrong while trying to compile ' 'Rcorrector.') Log.msg('Try downloading and installing it manually from: ' 'https://github.com/karolisr/Rcorrector') return None v = get_dep_version([rcorrector, '-version'], r'^Rcorrector\sv([\d\.]*)') Log.msg('Rcorrector is available:', v + ' ' + rcorrector) return rcorrector
def build_bt2_index(bowtie2_build, input_files, output_path, threads): cmd = [bowtie2_build, '--threads', str(threads), ','.join(input_files), output_path] run(cmd, do_not_raise=True)
def dep_check_kraken2(dir_dep, os_id, release_name, force): url = 'https://github.com/karolisr/kraken2/archive/master.tar.gz' dnld_path = opj(dir_dep, 'kraken2.tar.gz') try: if force is True: raise kraken2 = which('kraken2') kraken2_build = which('kraken2-build') dir_bin = dirname(kraken2) classify_bin = opj(dir_bin, 'classify') _ = run([classify_bin], do_not_raise=True) if not _.stderr.startswith('classify: mandatory filename'): raise run([kraken2, '--help']) run([kraken2_build, '--help']) except Exception: try: dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'kraken2')) kraken2 = opj(dir_bin, 'bin', 'kraken2') kraken2_build = opj(dir_bin, 'bin', 'kraken2-build') classify_bin = opj(dir_bin, 'bin', 'classify') _ = run([classify_bin], do_not_raise=True) if not _.stderr.startswith('classify: mandatory filename'): raise run([kraken2, '--help']) run([kraken2_build, '--help']) except Exception: Log.wrn('Kraken2 was not found on this system, trying to ' 'download.') if ope(dnld_path): remove(dnld_path) download_file(url, dnld_path) tar_ref = tarfile.open(dnld_path, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() dir_bin = opj(dir_dep, get_dep_dir(dir_dep, 'kraken2')) classify_bin = opj(dir_bin, 'bin', 'classify') kraken2 = opj(dir_bin, 'bin', 'kraken2') kraken2_build = opj(dir_bin, 'bin', 'kraken2-build') makefile = opj(dir_bin, 'src', 'Makefile') replace_line_in_file(makefile, 'cp $(PROGS) $(KRAKEN2_DIR)/', 'cp $(PROGS) "$(KRAKEN2_DIR)"/') try: Log.wrn('Compiling Kraken2 Attempt 1') run(['./install_kraken2.sh', 'bin'], cwd=dir_bin) _ = run([classify_bin], do_not_raise=True) if not _.stderr.startswith('classify: mandatory filename'): raise run([kraken2, '--help']) run([kraken2_build, '--help']) except Exception: try: Log.wrn('Compiling Kraken2 Attempt 2') dir_libomp = opj(dir_dep, 'libomp') if ope(dir_libomp): rmtree(dir_libomp) libomp_fp, v = brew_get('libomp', os_id, release_name, dir_dep) tar_ref = tarfile.open(libomp_fp, 'r:gz') tar_ref.extractall(dir_dep) tar_ref.close() dir_libomp_l = opj(dir_libomp, v, 'lib') dir_libomp_i = opj(dir_libomp, v, 'include') if os_id == 'mac': # Changes the shared library identification name of a # dynamic shared library. dylib_f = opj(dir_libomp_l, 'libomp.dylib') chmod( dylib_f, stat.S_IRWXU | stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH) cmd = ['install_name_tool', '-id', dylib_f, dylib_f] run(cmd) cxx_flags = ('CXXFLAGS = -L{} -I{} -Xpreprocessor ' '-fopenmp -lomp -Wall -std=c++11 -O3') elif os_id == 'linux': cxx_flags = ('CXXFLAGS = -L{} -I{} -fopenmp -lomp ' '-static -Wall -std=c++11 -O3') cxx_flags = cxx_flags.format(dir_libomp_l, dir_libomp_i) makefile = opj(dir_bin, 'src', 'Makefile') replace_line_in_file( makefile, 'CXXFLAGS = -fopenmp -Wall -std=c++11' ' -O3', cxx_flags) run(['./install_kraken2.sh', 'bin'], cwd=dir_bin) _ = run([classify_bin], do_not_raise=True) if not _.stderr.startswith('classify: mandatory filename'): raise run([kraken2, '--help']) run([kraken2_build, '--help']) except Exception: try: Log.wrn('Compiling Kraken2 Attempt 3') makefile = opj(dir_bin, 'src', 'Makefile') replace_line_in_file( makefile, cxx_flags, 'CXXFLAGS = -Wall -std=c++11 -O3') run(['./install_kraken2.sh', 'bin'], cwd=dir_bin) _ = run([classify_bin], do_not_raise=True) if not _.stderr.startswith( 'classify: mandatory filename'): raise run([kraken2, '--help']) run([kraken2_build, '--help']) except Exception: pass if not ope(kraken2): Log.err('Something went wrong while trying to compile ' 'Kraken2.') Log.msg('Try downloading and installing it manually from: ' 'https://github.com/karolisr/kraken2') return None, None regexp = r'^.*?version\s([\d\.\-A-Za-z]*)' v = get_dep_version([kraken2, '--version'], regexp) Log.msg('kraken2 is available:', v + ' ' + kraken2) v = get_dep_version([kraken2_build, '--version'], regexp) Log.msg('kraken2-build is available:', v + ' ' + kraken2_build) return kraken2, kraken2_build