def trim_primers_and_adapters(fasta_in, fasta_out, adapters_fa, primers_fa, min_length=100, min_dist_to_end=25, window_length=10, min_pc=90): '''Trim adapers and/or primers off contig ends''' assert adapters_fa is not None or primers_fa is not None tmpdir = tempfile.mkdtemp(prefix='tmp.trim.', dir=os.getcwd()) tmp_prefix = os.path.join(tmpdir, 'out') if adapters_fa is None: trim_query = primers_fa elif primers_fa is None: trim_query = adapters_fa else: trim_query = tmp_prefix + '.query.fa' common.syscall('cat ' + adapters_fa + ' ' + primers_fa + ' > ' + trim_query) _trim_ends(fasta_in, fasta_out, trim_query, min_length=min_length, min_dist_to_end=min_dist_to_end, window_length=window_length, min_pc=min_pc) shutil.rmtree(tmpdir)
def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail, threads=1): f = pyfastaq.utils.open_file_write(script) print('set -e', file=f) kmc_command = ''.join([ 'kmc -fa', ' -m', str(m_option), ' -k', str(kmer), ' -sf', str(threads), ' -ci', str(min_count), ' -cs', str(max_count), ' -cx', str(max_count), ' ', reads, ' kmc_out', ' $PWD' ]) print('ulimit -n 4096', file=f) print(kmc_command, end='', file=f) if verbose >= 2: print('', file=f) print('run kmc:', os.getcwd(), kmc_command) else: print(' > /dev/null', file=f) print('kmc_dump', 'kmc_out', 'kmc_out.dump', file=f) print('sort -k2nr', 'kmc_out.dump >', outfile, file=f) pyfastaq.utils.close(f) return common.syscall('bash ' + script, allow_fail=allow_fail)
def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail): f = pyfastaq.utils.open_file_write(script) print('set -e', file=f) kmc_command = ''.join([ 'kmc -fa', ' -m', str(m_option), ' -k', str(kmer), ' -sf', '1', ' -ci', str(min_count), ' -cs', str(max_count), ' -cx', str(max_count), ' ', reads, ' kmc_out', ' $PWD' ]) print(kmc_command, end='', file=f) if verbose >= 2: print('', file=f) print('run kmc:', os.getcwd(), kmc_command) else: print(' > /dev/null', file=f) print('kmc_dump', 'kmc_out', 'kmc_out.dump', file=f) print('sort -k2nr', 'kmc_out.dump >', outfile, file=f) pyfastaq.utils.close(f) return common.syscall('bash ' + script, allow_fail=allow_fail)
def run_nucmer(query, ref, outfile, min_id=95, min_length=100, breaklen=200): query = os.path.abspath(query) ref = os.path.abspath(ref) outfile = os.path.abspath(outfile) tmpdir = tempfile.mkdtemp(prefix='tmp.run_nucmer.', dir=os.getcwd()) original_dir = os.getcwd() os.chdir(tmpdir) script = 'run_nucmer.sh' f = pyfastaq.utils.open_file_write(script) print('nucmer --maxmatch -p p -b', breaklen, ref, query, file=f) print('delta-filter -i', min_id, '-l', min_length, 'p.delta > p.delta.filter', file=f) print('show-coords -dTlro p.delta.filter >', outfile, file=f) pyfastaq.utils.close(f) common.syscall('bash ' + script) os.chdir(original_dir) shutil.rmtree(tmpdir)
def run_trimmomatic(reads1, reads2, outprefix, trimmo_jar, adapters, minlen=50, verbose=0, threads=1, qual_trim=''): cmd = ' '.join([ 'java -Xmx1000m -jar', trimmo_jar, 'PE', '-threads', str(threads), reads1, reads2, outprefix + '_1.fq', outprefix + '.unpaired_1.fq', outprefix + '_2.fq', outprefix + '.unpaired_2.fq', 'ILLUMINACLIP:' + os.path.abspath(adapters) + ':2:10:7:1', qual_trim, 'MINLEN:' + str(minlen) ]) if verbose: print('Run trimmomatic:', cmd) common.syscall(cmd) os.unlink(outprefix + '.unpaired_1.fq') os.unlink(outprefix + '.unpaired_2.fq')
def map_reads(reads_fwd, reads_rev, ref_fa, out_prefix, index_k=15, index_s=3, threads=1, max_insert=1000, minid=0.5, verbose=0, required_flag=None, sort=False, exclude_flag=None, mate_ref=None, extra_smalt_map_ops=None): if extra_smalt_map_ops is None: extra_smalt_map_ops = '' map_index = out_prefix + '.map_index' clean_files = [map_index + '.' + x for x in ['smi', 'sma']] index_cmd = ' '.join([ 'smalt index', '-k', str(index_k), '-s', str(index_s), map_index, ref_fa ]) map_cmd = 'smalt map ' + extra_smalt_map_ops + ' ' # depending on OS, -n can break smalt, so only use -n if it's > 1. if threads > 1: map_cmd += '-n ' + str(threads) + ' -O ' if reads_rev is None: map_cmd += ' '.join([ '-y', str(minid), map_index, reads_fwd, ]) else: map_cmd += ' '.join([ '-i', str(max_insert), '-y', str(minid), map_index, reads_fwd, reads_rev, ]) if mate_ref is not None: map_cmd += r''' | awk '$7=="''' + mate_ref + '"\'' map_cmd += ' | samtools view' if required_flag is not None: map_cmd += ' -f ' + str(required_flag) if exclude_flag is not None: map_cmd += ' -F ' + str(exclude_flag) final_bam = out_prefix + '.bam' if sort: intermediate_bam = out_prefix + '.unsorted.bam' else: intermediate_bam = final_bam map_cmd += ' -bS -T ' + ref_fa + ' - > ' + intermediate_bam common.syscall(index_cmd) common.syscall(map_cmd) if verbose >= 2: print(' map reads. Index: ', index_cmd) print(' map reads. Mapping:', map_cmd) if sort: threads = min(4, threads) thread_mem = int(500 / threads) sort_cmd = 'samtools sort -@' + str(threads) + ' -m ' + str(thread_mem) + 'M ' + intermediate_bam + ' ' + out_prefix index_cmd = 'samtools index ' + final_bam if verbose >= 2: print(' map reads. sort: ', sort_cmd) common.syscall(sort_cmd) if verbose >= 2: print(' map reads. index: ', index_cmd) common.syscall(index_cmd) for fname in clean_files: os.unlink(fname)