Exemple #1
0
def map_with_bowtie2(index_fpath, bam_fpath, paired_fpaths=None,
                     unpaired_fpaths=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
           bam_fpath]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Exemple #2
0
def map_with_bwasw(index_fpath, bam_fpath, unpaired_fpath=None,
                    paired_fpaths=None, readgroup=None, threads=None,
                    log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = ['java', '-jar',
           os.path.join(picard_tools, 'AddOrReplaceReadGroups.jar'),
           'INPUT=/dev/stdin', 'OUTPUT={0}'.format(bam_fpath),
           'RGID={0}'.format(readgroup['ID']),
           'RGLB={0}'.format(readgroup['LB']),
           'RGPL={0}'.format(readgroup['PL']),
           'RGSM={0}'.format(readgroup['SM']),
           'RGPU={0}'.format(readgroup['PU']),
           'VALIDATION_STRINGENCY=LENIENT']
    else:
        cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-',
               '-o', bam_fpath]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Exemple #3
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    'It creates the blast db database'
    cmd = [get_binary_path('makeblastdb'), '-in', seq_fpath, '-dbtype', dbtype]
    if outputdb is not None:
        cmd.extend(['-out', outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #4
0
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {
            'ID': lib_name,
            'PL': 'illumina',
            'LB': lib_name,
            'SM': '{0}_illumina_pe'.format(lib_name),
            'PU': '0'
        }
        bwa = map_with_bwamem(index_fpath,
                              unpaired_fpath=reads_fpath,
                              readgroup=readgroup,
                              log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', '-h', bam_fhand.name],
            stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Exemple #5
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                   threads=None, log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Exemple #6
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    "It creates the blast db database"
    cmd = [get_binary_path("makeblastdb"), "-in", seq_fpath, "-dbtype", dbtype]
    if outputdb is not None:
        cmd.extend(["-out", outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #7
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    'It creates the blast db database'
    cmd = [get_binary_path('makeblastdb'), '-in', seq_fpath, '-dbtype', dbtype]
    if outputdb is not None:
        cmd.extend(['-out', outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #8
0
def _create_bwa_index(index_fpath):
    binary = get_binary_path('bwa')
    # how many sequences do we have?
    n_seqs = [l for l in open(index_fpath) if l[0] == '>']
    algorithm = 'bwtsw' if n_seqs > 10000 else 'is'
    cmd = [binary, 'index', '-a', algorithm, index_fpath]
    process = popen(cmd, stdout=PIPE, stderr=PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #9
0
def get_genome_coverage(bam_fhands):
    coverage_hist = IntCounter()
    for bam_fhand in bam_fhands:
        bam_fpath = bam_fhand.name
        cmd = [get_binary_path('bedtools'), 'genomecov', '-ibam', bam_fpath]
        cover_process = Popen(cmd, stdout=PIPE)
        for line in cover_process.stdout:
            if line.startswith('genome'):
                cov, value = line.split('\t')[1:3]
                coverage_hist[int(cov)] += int(value)
    return coverage_hist
Exemple #10
0
def get_genome_coverage(bam_fhands):
    coverage_hist = IntCounter()
    for bam_fhand in bam_fhands:
        bam_fpath = bam_fhand.name
        cmd = [get_binary_path('bedtools'), 'genomecov', '-ibam', bam_fpath]
        cover_process = Popen(cmd, stdout=PIPE)
        for line in cover_process.stdout:
            if line.startswith('genome'):
                cov, value = line.split('\t')[1: 3]
                coverage_hist[int(cov)] += int(value)
    return coverage_hist
Exemple #11
0
def _run_estscan(seqs, pep_out_fpath, dna_out_fpath, matrix_fpath):
    'It runs estscan in the input seqs'
    seq_fhand = write_seqs(seqs, file_format='fasta')
    seq_fhand.flush()
    binary = get_binary_path('estscan')

    cmd = [binary, '-t', pep_out_fpath, '-o', dna_out_fpath, '-M',
           matrix_fpath, seq_fhand.name]
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
    seq_fhand.close()
Exemple #12
0
def _run_estscan(seqs, pep_out_fpath, dna_out_fpath, matrix_fpath):
    'It runs estscan in the input seqs'
    seq_fhand = write_seqs(seqs, file_format='fasta')
    seq_fhand.flush()
    binary = get_binary_path('estscan')

    cmd = [binary, '-t', pep_out_fpath, '-o', dna_out_fpath, '-M',
           matrix_fpath, seq_fhand.name]
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
    seq_fhand.close()
Exemple #13
0
def get_or_create_bowtie2_index(fpath, directory=None):
    "it creates the bowtie2 index"
    binary = get_binary_path('bowtie2-build')
    if directory is not None:
        index_fpath = os.path.join(directory, os.path.basename(fpath))
    else:
        index_fpath = fpath
    if not _bowtie2_index_exists(index_fpath):
        cmd = [binary, '-f', fpath, index_fpath]
        process = popen(cmd, stdout=PIPE, stderr=PIPE)
        check_process_finishes(process, binary=cmd[0])
    return index_fpath
Exemple #14
0
def map_process_to_bam(map_process, bam_fpath, log_fpath=None):
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
           bam_fpath]

    samtools = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       bam_fhand.name])
        assert  'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', bam_fhand.name])
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Exemple #17
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                    interleave_fpath=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None):
    'It maps with bwa mem algorithm'
    interleave = False
    num_called_fpaths = 0
    in_fpaths = []
    if unpaired_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(unpaired_fpath)
    if paired_fpaths is not None:
        num_called_fpaths += 1
        in_fpaths.extend(paired_fpaths)
    if interleave_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(interleave_fpath)
        interleave = True

    if num_called_fpaths == 0:
        raise RuntimeError('At least one file to map is required')
    if num_called_fpaths > 1:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    if '-p' in extra_params:
        extra_params.remove('-p')

    if interleave:
        extra_params.append('-p')

    if readgroup is not None:
        rg_str = '@RG\tID:{ID}\tSM:{SM}\tPL:{PL}\tLB:{LB}'.format(**readgroup)
        extra_params.extend(['-R', rg_str])

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)
    cmd.extend(in_fpaths)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Exemple #18
0
def map_process_to_bam(map_process, bam_fpath, log_fpath=None,
                       tempdir=None):
    ''' It receives a mapping process that has a sam file in stdout and
    calling another external process convert the sam file into a bam file.
    Optionally you can fill the readgroup field
    '''
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-',
           '-o', bam_fpath]

    samtools = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Exemple #19
0
def get_reference_counts(bam_fpath):
    'Using samtools idxstats it generates dictionaries with read counts'
    cmd = [get_binary_path('samtools'), 'idxstats', bam_fpath]
    idx_process = Popen(cmd, stdout=PIPE)
    # we're not using pysam.idxstats here because the stdout differed
    # depending on how the tests were run
    for line in idx_process.stdout:
        ref_name, ref_length, mapped_reads, unmapped_reads = line.split()
        if ref_name == '*':
            ref_name = None
            ref_length = None
        else:
            ref_length = int(ref_length)
        yield {'reference': ref_name, 'length': ref_length,
               'mapped_reads': int(mapped_reads),
               'unmapped_reads': int(unmapped_reads)}
Exemple #20
0
def get_reference_counts(bam_fpath):
    'Using samtools idxstats it generates dictionaries with read counts'
    cmd = [get_binary_path('samtools'), 'idxstats', bam_fpath]
    idx_process = Popen(cmd, stdout=PIPE)
    # we're not using pysam.idxstats here because the stdout differed
    # depending on how the tests were run
    for line in idx_process.stdout:
        ref_name, ref_length, mapped_reads, unmapped_reads = line.split()
        if ref_name == '*':
            ref_name = None
            ref_length = None
        else:
            ref_length = int(ref_length)
        yield {'reference': ref_name, 'length': ref_length,
               'mapped_reads': int(mapped_reads),
               'unmapped_reads': int(unmapped_reads)}
Exemple #21
0
def map_with_bowtie2(index_fpath, paired_fpaths=None,
                     unpaired_fpath=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    return bowtie2
Exemple #22
0
def do_blast(query_fpath, db_fpath, program, out_fpath, params=None):
    "It does a blast"
    if not params:
        params = {}
    evalue = params.get("evalue", 0.001)
    task = params.get("task", "megablast")
    outfmt = str(params.get("outfmt", 5))
    assert task in ("blastn", "blastn-short", "dc-megablast", "megablast", "rmblastn")

    if program not in ("blastn", "blastp", "blastx", "tblastx", "tblastn"):
        raise ValueError("The given program is invalid: " + str(program))
    binary = get_binary_path(program)
    cmd = [binary, "-query", query_fpath, "-db", db_fpath, "-out", out_fpath]
    cmd.extend(["-evalue", str(evalue), "-task", task])
    cmd.extend(["-outfmt", outfmt])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #23
0
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {'ID': lib_name, 'PL': 'illumina', 'LB': lib_name,
                     'SM': '{0}_illumina_pe'.format(lib_name), 'PU': '0'}
        bwa = map_with_bwamem(index_fpath, unpaired_fpath=reads_fpath,
                              readgroup=readgroup, log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       '-h', bam_fhand.name], stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
Exemple #24
0
def map_with_hisat2(index_fpath, paired_fpaths=None,
                    unpaired_fpath=None, readgroup=None, threads=None,
                    log_fhand=None, extra_params=None):
    '''It maps with hisat2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('hisat2')
    cmd = [binary, '-x', index_fpath, '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    hisat2 = popen(cmd, stderr=log_fhand, stdout=PIPE)
    return hisat2
Exemple #25
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    "It does a blast"
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if "outfmt" in params:
        outfmt = params["outfmt"]
        del params["outfmt"]
    else:
        outfmt = 5

    if program not in ("blastn", "blastp", "blastx", "tblastx", "tblastn"):
        raise ValueError("The given program is invalid: " + str(program))
    binary = get_binary_path(program)
    cmd = [binary, "-query", query_fpath, "-db", db_fpath, "-out", out_fpath]
    cmd.extend(["-evalue", str(evalue), "-outfmt", str(outfmt)])
    if task:
        cmd.extend(["-task", task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(("-" + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #26
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    'It does a blast'
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if 'outfmt' in params:
        outfmt = params['outfmt']
        del params['outfmt']
    else:
        outfmt = 5

    if program not in ('blastn', 'blastp', 'blastx', 'tblastx', 'tblastn'):
        raise ValueError('The given program is invalid: ' + str(program))
    binary = get_binary_path(program)
    cmd = [binary, '-query', query_fpath, '-db', db_fpath, '-out', out_fpath]
    cmd.extend(['-evalue', str(evalue), '-outfmt', str(outfmt)])
    if task:
        cmd.extend(['-task', task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(('-' + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #27
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    'It does a blast'
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if 'outfmt' in params:
        outfmt = params['outfmt']
        del params['outfmt']
    else:
        outfmt = 5

    if program not in ('blastn', 'blastp', 'blastx', 'tblastx', 'tblastn'):
        raise ValueError('The given program is invalid: ' + str(program))
    binary = get_binary_path(program)
    cmd = [binary, '-query', query_fpath, '-db', db_fpath, '-out', out_fpath]
    cmd.extend(['-evalue', str(evalue), '-outfmt', str(outfmt)])
    if task:
        cmd.extend(['-task', task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(('-' + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Exemple #28
0
def map_with_bowtie2(index_fpath,
                     bam_fpath,
                     paired_fpaths=None,
                     unpaired_fpaths=None,
                     readgroup=None,
                     threads=None,
                     log_fpath=None,
                     preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [
        binary, '-x', index_fpath, '--{0}'.format(preset), '-p',
        str(get_num_threads(threads))
    ]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [
        get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
        bam_fpath
    ]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Exemple #29
0
def map_with_bwasw(index_fpath,
                   bam_fpath,
                   unpaired_fpath=None,
                   paired_fpaths=None,
                   readgroup=None,
                   threads=None,
                   log_fpath=None,
                   extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = [
            'java', '-jar',
            os.path.join(picard_tools,
                         'AddOrReplaceReadGroups.jar'), 'INPUT=/dev/stdin',
            'OUTPUT={0}'.format(bam_fpath), 'RGID={0}'.format(readgroup['ID']),
            'RGLB={0}'.format(readgroup['LB']),
            'RGPL={0}'.format(readgroup['PL']),
            'RGSM={0}'.format(readgroup['SM']),
            'RGPU={0}'.format(readgroup['PU']), 'VALIDATION_STRINGENCY=LENIENT'
        ]
    else:
        cmd = [
            get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
            bam_fpath
        ]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Exemple #30
0
def index_bam(bam_fpath):
    'It indexes a bam file'
    samtools_binary = get_binary_path('samtools')
    subprocess.check_call([samtools_binary, 'index', bam_fpath])
Exemple #31
0
def index_bam(bam_fpath):
    'It indexes a bam file'
    samtools_binary = get_binary_path('samtools')
    subprocess.check_call([samtools_binary, 'index', bam_fpath])