Ejemplo n.º 1
0
def trim_primers_and_adapters(fasta_in,
                              fasta_out,
                              adapters_fa,
                              primers_fa,
                              min_length=100,
                              min_dist_to_end=25,
                              window_length=10,
                              min_pc=90):
    '''Trim adapers and/or primers off contig ends'''
    assert adapters_fa is not None or primers_fa is not None
    tmpdir = tempfile.mkdtemp(prefix='tmp.trim.', dir=os.getcwd())
    tmp_prefix = os.path.join(tmpdir, 'out')

    if adapters_fa is None:
        trim_query = primers_fa
    elif primers_fa is None:
        trim_query = adapters_fa
    else:
        trim_query = tmp_prefix + '.query.fa'
        common.syscall('cat ' + adapters_fa + ' ' + primers_fa + ' > ' +
                       trim_query)

    _trim_ends(fasta_in,
               fasta_out,
               trim_query,
               min_length=min_length,
               min_dist_to_end=min_dist_to_end,
               window_length=window_length,
               min_pc=min_pc)
    shutil.rmtree(tmpdir)
Ejemplo n.º 2
0
def trim_primers_and_adapters(fasta_in, fasta_out, adapters_fa, primers_fa, min_length=100, min_dist_to_end=25, window_length=10, min_pc=90):
    '''Trim adapers and/or primers off contig ends'''
    assert adapters_fa is not None or primers_fa is not None
    tmpdir = tempfile.mkdtemp(prefix='tmp.trim.', dir=os.getcwd())
    tmp_prefix = os.path.join(tmpdir, 'out')

    if adapters_fa is None:
        trim_query = primers_fa
    elif primers_fa is None:
        trim_query = adapters_fa
    else:
        trim_query = tmp_prefix + '.query.fa'
        common.syscall('cat ' + adapters_fa + ' ' + primers_fa + ' > ' + trim_query)

    _trim_ends(fasta_in, fasta_out, trim_query, min_length=min_length, min_dist_to_end=min_dist_to_end, window_length=window_length, min_pc=min_pc)
    shutil.rmtree(tmpdir)
Ejemplo n.º 3
0
def _run_kmc_with_script(script,
                         reads,
                         outfile,
                         kmer,
                         min_count,
                         max_count,
                         m_option,
                         verbose,
                         allow_fail,
                         threads=1):
    f = pyfastaq.utils.open_file_write(script)
    print('set -e', file=f)
    kmc_command = ''.join([
        'kmc -fa', ' -m',
        str(m_option), ' -k',
        str(kmer), ' -sf',
        str(threads), ' -ci',
        str(min_count), ' -cs',
        str(max_count), ' -cx',
        str(max_count), ' ', reads, ' kmc_out', ' $PWD'
    ])
    print('ulimit -n 4096', file=f)
    print(kmc_command, end='', file=f)
    if verbose >= 2:
        print('', file=f)
        print('run kmc:', os.getcwd(), kmc_command)
    else:
        print(' > /dev/null', file=f)

    print('kmc_dump', 'kmc_out', 'kmc_out.dump', file=f)
    print('sort -k2nr', 'kmc_out.dump >', outfile, file=f)
    pyfastaq.utils.close(f)
    return common.syscall('bash ' + script, allow_fail=allow_fail)
Ejemplo n.º 4
0
def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail):
    f = pyfastaq.utils.open_file_write(script)
    print('set -e', file=f)
    kmc_command = ''.join([
        'kmc -fa',
         ' -m', str(m_option),
         ' -k', str(kmer),
         ' -sf', '1',
         ' -ci', str(min_count),
         ' -cs', str(max_count),
         ' -cx', str(max_count),
         ' ', reads,
         ' kmc_out',
         ' $PWD'
    ])
    print(kmc_command, end='', file=f)
    if verbose >= 2:
        print('', file=f)
        print('run kmc:', os.getcwd(), kmc_command)
    else:
        print(' > /dev/null', file=f)

    print('kmc_dump', 'kmc_out', 'kmc_out.dump', file=f)
    print('sort -k2nr', 'kmc_out.dump >', outfile, file=f)
    pyfastaq.utils.close(f)
    return common.syscall('bash ' + script, allow_fail=allow_fail)
Ejemplo n.º 5
0
def run_nucmer(query, ref, outfile, min_id=95, min_length=100, breaklen=200):
    query = os.path.abspath(query)
    ref = os.path.abspath(ref)
    outfile = os.path.abspath(outfile)
    tmpdir = tempfile.mkdtemp(prefix='tmp.run_nucmer.', dir=os.getcwd())
    original_dir = os.getcwd()
    os.chdir(tmpdir)
    script = 'run_nucmer.sh'
    f = pyfastaq.utils.open_file_write(script)
    print('nucmer --maxmatch -p p -b', breaklen, ref, query, file=f)
    print('delta-filter -i', min_id, '-l', min_length, 'p.delta > p.delta.filter', file=f)
    print('show-coords -dTlro p.delta.filter >', outfile, file=f)
    pyfastaq.utils.close(f)
    common.syscall('bash ' + script)
    os.chdir(original_dir)
    shutil.rmtree(tmpdir)
Ejemplo n.º 6
0
def run_nucmer(query, ref, outfile, min_id=95, min_length=100, breaklen=200):
    query = os.path.abspath(query)
    ref = os.path.abspath(ref)
    outfile = os.path.abspath(outfile)
    tmpdir = tempfile.mkdtemp(prefix='tmp.run_nucmer.', dir=os.getcwd())
    original_dir = os.getcwd()
    os.chdir(tmpdir)
    script = 'run_nucmer.sh'
    f = pyfastaq.utils.open_file_write(script)
    print('nucmer --maxmatch -p p -b', breaklen, ref, query, file=f)
    print('delta-filter -i', min_id, '-l', min_length, 'p.delta > p.delta.filter', file=f)
    print('show-coords -dTlro p.delta.filter >', outfile, file=f)
    pyfastaq.utils.close(f)
    common.syscall('bash ' + script)
    os.chdir(original_dir)
    shutil.rmtree(tmpdir)
Ejemplo n.º 7
0
def run_trimmomatic(reads1, reads2, outprefix, trimmo_jar, adapters, minlen=50, verbose=0, threads=1, qual_trim=''):
    cmd = ' '.join([
        'java -Xmx1000m -jar',
        trimmo_jar,
        'PE',
        '-threads', str(threads),
        reads1,
        reads2,
        outprefix + '_1.fq',
        outprefix + '.unpaired_1.fq',
        outprefix + '_2.fq',
        outprefix + '.unpaired_2.fq',
        'ILLUMINACLIP:' + os.path.abspath(adapters) + ':2:10:7:1',
        qual_trim,
        'MINLEN:' + str(minlen)
    ])

    if verbose:
        print('Run trimmomatic:', cmd)
    common.syscall(cmd)
    os.unlink(outprefix + '.unpaired_1.fq')
    os.unlink(outprefix + '.unpaired_2.fq')
Ejemplo n.º 8
0
def run_trimmomatic(reads1,
                    reads2,
                    outprefix,
                    trimmo_jar,
                    adapters,
                    minlen=50,
                    verbose=0,
                    threads=1,
                    qual_trim=''):
    cmd = ' '.join([
        'java -Xmx1000m -jar', trimmo_jar, 'PE', '-threads',
        str(threads), reads1, reads2, outprefix + '_1.fq',
        outprefix + '.unpaired_1.fq', outprefix + '_2.fq',
        outprefix + '.unpaired_2.fq',
        'ILLUMINACLIP:' + os.path.abspath(adapters) + ':2:10:7:1', qual_trim,
        'MINLEN:' + str(minlen)
    ])

    if verbose:
        print('Run trimmomatic:', cmd)
    common.syscall(cmd)
    os.unlink(outprefix + '.unpaired_1.fq')
    os.unlink(outprefix + '.unpaired_2.fq')
Ejemplo n.º 9
0
Archivo: mapping.py Proyecto: satta/iva
def map_reads(reads_fwd, reads_rev, ref_fa, out_prefix, index_k=15, index_s=3, threads=1, max_insert=1000, minid=0.5, verbose=0, required_flag=None, sort=False, exclude_flag=None, mate_ref=None, extra_smalt_map_ops=None):
    if extra_smalt_map_ops is None:
        extra_smalt_map_ops = ''
    map_index = out_prefix + '.map_index'
    clean_files = [map_index + '.' + x for x in ['smi', 'sma']]
    index_cmd = ' '.join([
        'smalt index',
        '-k', str(index_k),
        '-s', str(index_s),
        map_index,
        ref_fa
    ])

    map_cmd = 'smalt map ' + extra_smalt_map_ops + ' '

    # depending on OS, -n can break smalt, so only use -n if it's > 1.
    if threads > 1:
        map_cmd += '-n ' + str(threads) + ' -O '

    if reads_rev is None:
        map_cmd += ' '.join([
            '-y', str(minid),
            map_index,
            reads_fwd,
        ])
    else:
        map_cmd += ' '.join([
            '-i', str(max_insert),
            '-y', str(minid),
            map_index,
            reads_fwd,
            reads_rev,
        ])

    if mate_ref is not None:
        map_cmd += r''' | awk '$7=="''' + mate_ref + '"\''


    map_cmd += ' | samtools view'

    if required_flag is not None:
        map_cmd += ' -f ' + str(required_flag)

    if exclude_flag is not None:
        map_cmd += ' -F ' + str(exclude_flag)

    final_bam = out_prefix + '.bam'
    if sort:
        intermediate_bam = out_prefix + '.unsorted.bam'
    else:
        intermediate_bam = final_bam

    map_cmd += ' -bS -T ' + ref_fa + '  - > ' + intermediate_bam
    common.syscall(index_cmd)
    common.syscall(map_cmd)
    if verbose >= 2:
        print('        map reads. Index:  ', index_cmd)
        print('        map reads. Mapping:', map_cmd)

    if sort:
        threads = min(4, threads)
        thread_mem = int(500 / threads)
        sort_cmd = 'samtools sort -@' + str(threads) + ' -m ' + str(thread_mem) + 'M ' + intermediate_bam + ' ' + out_prefix
        index_cmd = 'samtools index ' + final_bam
        if verbose >= 2:
            print('        map reads. sort:  ', sort_cmd)
        common.syscall(sort_cmd)
        if verbose >= 2:
            print('        map reads. index:  ', index_cmd)
        common.syscall(index_cmd)
    for fname in clean_files:
        os.unlink(fname)