Example #1
0
    def test_popen(self):
        'It checks that we can create a process'

        try:
            popen(['bad_binary'])
            self.fail()
        except MissingBinaryError:
            pass

        popen(['ls'], stdout=PIPE)
Example #2
0
    def test_popen(self):
        "It checks that we can create a process"

        try:
            popen(["bad_binary"])
            self.fail()
        except MissingBinaryError:
            pass

        popen(["ls"], stdout=PIPE)
Example #3
0
    def test_popen(self):
        'It checks that we can create a process'

        try:
            popen(['bad_binary'])
            self.fail()
        except MissingBinaryError:
            pass

        popen(['ls'], stdout=PIPE)
Example #4
0
def map_with_bowtie2(index_fpath, bam_fpath, paired_fpaths=None,
                     unpaired_fpaths=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
           bam_fpath]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Example #5
0
def map_with_bwasw(index_fpath, bam_fpath, unpaired_fpath=None,
                    paired_fpaths=None, readgroup=None, threads=None,
                    log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = ['java', '-jar',
           os.path.join(picard_tools, 'AddOrReplaceReadGroups.jar'),
           'INPUT=/dev/stdin', 'OUTPUT={0}'.format(bam_fpath),
           'RGID={0}'.format(readgroup['ID']),
           'RGLB={0}'.format(readgroup['LB']),
           'RGPL={0}'.format(readgroup['PL']),
           'RGSM={0}'.format(readgroup['SM']),
           'RGPU={0}'.format(readgroup['PU']),
           'VALIDATION_STRINGENCY=LENIENT']
    else:
        cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-',
               '-o', bam_fpath]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Example #6
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    "It creates the blast db database"
    cmd = [get_binary_path("makeblastdb"), "-in", seq_fpath, "-dbtype", dbtype]
    if outputdb is not None:
        cmd.extend(["-out", outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #7
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    'It creates the blast db database'
    cmd = [get_binary_path('makeblastdb'), '-in', seq_fpath, '-dbtype', dbtype]
    if outputdb is not None:
        cmd.extend(['-out', outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #8
0
def _makeblastdb_plus(seq_fpath, dbtype, outputdb=None):
    'It creates the blast db database'
    cmd = [get_binary_path('makeblastdb'), '-in', seq_fpath, '-dbtype', dbtype]
    if outputdb is not None:
        cmd.extend(['-out', outputdb])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #9
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                   threads=None, log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Example #10
0
def _create_bwa_index(index_fpath):
    binary = get_binary_path('bwa')
    # how many sequences do we have?
    n_seqs = [l for l in open(index_fpath) if l[0] == '>']
    algorithm = 'bwtsw' if n_seqs > 10000 else 'is'
    cmd = [binary, 'index', '-a', algorithm, index_fpath]
    process = popen(cmd, stdout=PIPE, stderr=PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #11
0
def _run_estscan(seqs, pep_out_fpath, dna_out_fpath, matrix_fpath):
    'It runs estscan in the input seqs'
    seq_fhand = write_seqs(seqs, file_format='fasta')
    seq_fhand.flush()
    binary = get_binary_path('estscan')

    cmd = [binary, '-t', pep_out_fpath, '-o', dna_out_fpath, '-M',
           matrix_fpath, seq_fhand.name]
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
    seq_fhand.close()
Example #12
0
def _run_estscan(seqs, pep_out_fpath, dna_out_fpath, matrix_fpath):
    'It runs estscan in the input seqs'
    seq_fhand = write_seqs(seqs, file_format='fasta')
    seq_fhand.flush()
    binary = get_binary_path('estscan')

    cmd = [binary, '-t', pep_out_fpath, '-o', dna_out_fpath, '-M',
           matrix_fpath, seq_fhand.name]
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
    seq_fhand.close()
Example #13
0
def map_process_to_bam(map_process, bam_fpath, log_fpath=None):
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
           bam_fpath]

    samtools = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Example #14
0
def get_or_create_bowtie2_index(fpath, directory=None):
    "it creates the bowtie2 index"
    binary = get_binary_path('bowtie2-build')
    if directory is not None:
        index_fpath = os.path.join(directory, os.path.basename(fpath))
    else:
        index_fpath = fpath
    if not _bowtie2_index_exists(index_fpath):
        cmd = [binary, '-f', fpath, index_fpath]
        process = popen(cmd, stdout=PIPE, stderr=PIPE)
        check_process_finishes(process, binary=cmd[0])
    return index_fpath
Example #15
0
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None,
                    paired_fpaths=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None, mate_inner_dist=None,
                    mate_std_dev=None):
    if unpaired_fpath is not None and paired_fpaths is not None:
        msg = "Tophat devs don't recommend mixing paired and unpaired reads"
        raise RuntimeError(msg)
    if extra_params is None:
        extra_params = []

    standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed',
                      '--keep-fasta-order']
    for standar_param in standar_params:
        if standar_param not in extra_params:
            extra_params.append(standar_param)
    if threads is not None:
        extra_params.extend(['-p', str(get_num_threads(threads))])

    if paired_fpaths:
        if mate_inner_dist is None or mate_std_dev is None:
            raise RuntimeError('with paires reads inner-dist is mandatory')
        extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev',
                             str(mate_std_dev)])

    extra_params.extend(['-o', out_dir])
    if readgroup is not None:
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]),
                                 value])
    cmd = ['tophat']
    cmd.extend(extra_params)
    cmd.append(index_fpath)

    if paired_fpaths:
        cmd.extend(paired_fpaths)

    if unpaired_fpath:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #print " ".join(cmd)
    #return
    tophat = popen(cmd, stderr=stderr, stdout=PIPE)
    tophat.communicate()

    if tophat.returncode:
        sys.stderr.write('Error in tophat process\n')
Example #16
0
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None,
                    paired_fpaths=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None, mate_inner_dist=None,
                    mate_std_dev=None):
    if unpaired_fpath is not None and paired_fpaths is not None:
        msg = "Tophat devs don't recommend mixing paired and unpaired reads"
        raise RuntimeError(msg)
    if extra_params is None:
        extra_params = []

    standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed',
                      '--keep-fasta-order']
    for standar_param in standar_params:
        if standar_param not in extra_params:
            extra_params.append(standar_param)
    if threads is not None:
        extra_params.extend(['-p', str(get_num_threads(threads))])

    if paired_fpaths:
        if mate_inner_dist is None or mate_std_dev is None:
            raise RuntimeError('with paires reads inner-dist is mandatory')
        extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev',
                             str(mate_std_dev)])

    extra_params.extend(['-o', out_dir])
    if readgroup is not None:
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]),
                                 value])
    cmd = ['tophat']
    cmd.extend(extra_params)
    cmd.append(index_fpath)

    if paired_fpaths:
        cmd.extend(paired_fpaths)

    if unpaired_fpath:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    # raw_input(' '.join(cmd))
    tophat = popen(cmd, stderr=stderr, stdout=PIPE)
    tophat.communicate()
Example #17
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                    interleave_fpath=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None):
    'It maps with bwa mem algorithm'
    interleave = False
    num_called_fpaths = 0
    in_fpaths = []
    if unpaired_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(unpaired_fpath)
    if paired_fpaths is not None:
        num_called_fpaths += 1
        in_fpaths.extend(paired_fpaths)
    if interleave_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(interleave_fpath)
        interleave = True

    if num_called_fpaths == 0:
        raise RuntimeError('At least one file to map is required')
    if num_called_fpaths > 1:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    if '-p' in extra_params:
        extra_params.remove('-p')

    if interleave:
        extra_params.append('-p')

    if readgroup is not None:
        rg_str = '@RG\tID:{ID}\tSM:{SM}\tPL:{PL}\tLB:{LB}'.format(**readgroup)
        extra_params.extend(['-R', rg_str])

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)
    cmd.extend(in_fpaths)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Example #18
0
def map_process_to_bam(map_process, bam_fpath, log_fpath=None,
                       tempdir=None):
    ''' It receives a mapping process that has a sam file in stdout and
    calling another external process convert the sam file into a bam file.
    Optionally you can fill the readgroup field
    '''
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-',
           '-o', bam_fpath]

    samtools = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Example #19
0
def map_process_to_sortedbam(map_process, out_fpath, key='coordinate',
                             log_fpath=None, tempdir=None):
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    if tempdir is None:
        tempdir = tempfile.gettempdir()
    picard_jar = get_setting("PICARD_JAR")
    cmd = ['java', '-jar', picard_jar, 'SortSam', 'I=/dev/stdin',
           'O=' + out_fpath, 'SO=' + key, 'TMP_DIR=' + tempdir,
           'VALIDATION_STRINGENCY=LENIENT']
    sort = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()
    sort.communicate()
Example #20
0
def map_process_to_sortedbam(map_process, out_fpath, key='coordinate',
                             log_fpath=None, tempdir=None):
    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    if tempdir is None:
        tempdir = tempfile.gettempdir()
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    fpath = os.path.join(picard_tools, 'SortSam.jar')
    cmd = ['java', '-jar', fpath, 'I=/dev/stdin',
           'O=' + out_fpath, 'SO=' + key, 'TMP_DIR=' + tempdir,
           'VALIDATION_STRINGENCY=LENIENT']
    sort = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()
    sort.communicate()
Example #21
0
def map_with_bowtie2(index_fpath, paired_fpaths=None,
                     unpaired_fpath=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    return bowtie2
Example #22
0
def do_blast(query_fpath, db_fpath, program, out_fpath, params=None):
    "It does a blast"
    if not params:
        params = {}
    evalue = params.get("evalue", 0.001)
    task = params.get("task", "megablast")
    outfmt = str(params.get("outfmt", 5))
    assert task in ("blastn", "blastn-short", "dc-megablast", "megablast", "rmblastn")

    if program not in ("blastn", "blastp", "blastx", "tblastx", "tblastn"):
        raise ValueError("The given program is invalid: " + str(program))
    binary = get_binary_path(program)
    cmd = [binary, "-query", query_fpath, "-db", db_fpath, "-out", out_fpath]
    cmd.extend(["-evalue", str(evalue), "-task", task])
    cmd.extend(["-outfmt", outfmt])
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #23
0
def map_process_to_sortedbam(map_process, out_fpath, key='coordinate',
                             stderr_fhand=None, tempdir=None):
    if stderr_fhand is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = stderr_fhand

    if tempdir is None:
        tempdir = tempfile.gettempdir()
    picard_jar = get_setting("PICARD_JAR")
    cmd = ['java', '-jar', picard_jar, 'SortSam', 'I=/dev/stdin',
           'O=' + out_fpath, 'SO=' + key, 'TMP_DIR=' + tempdir,
           'VALIDATION_STRINGENCY=LENIENT']
    sort = popen(cmd, stdin=map_process.stdout, stderr=stderr)
    map_process.stdout.close()
    sort.communicate()

    if map_process.returncode:
        raise RuntimeError('Error in mapping process')

    if sort.returncode:
        raise RuntimeError('Error in Sort process')
Example #24
0
def map_with_hisat2(index_fpath, paired_fpaths=None,
                    unpaired_fpath=None, readgroup=None, threads=None,
                    log_fhand=None, extra_params=None):
    '''It maps with hisat2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('hisat2')
    cmd = [binary, '-x', index_fpath, '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    hisat2 = popen(cmd, stderr=log_fhand, stdout=PIPE)
    return hisat2
Example #25
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    'It does a blast'
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if 'outfmt' in params:
        outfmt = params['outfmt']
        del params['outfmt']
    else:
        outfmt = 5

    if program not in ('blastn', 'blastp', 'blastx', 'tblastx', 'tblastn'):
        raise ValueError('The given program is invalid: ' + str(program))
    binary = get_binary_path(program)
    cmd = [binary, '-query', query_fpath, '-db', db_fpath, '-out', out_fpath]
    cmd.extend(['-evalue', str(evalue), '-outfmt', str(outfmt)])
    if task:
        cmd.extend(['-task', task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(('-' + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #26
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    'It does a blast'
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if 'outfmt' in params:
        outfmt = params['outfmt']
        del params['outfmt']
    else:
        outfmt = 5

    if program not in ('blastn', 'blastp', 'blastx', 'tblastx', 'tblastn'):
        raise ValueError('The given program is invalid: ' + str(program))
    binary = get_binary_path(program)
    cmd = [binary, '-query', query_fpath, '-db', db_fpath, '-out', out_fpath]
    cmd.extend(['-evalue', str(evalue), '-outfmt', str(outfmt)])
    if task:
        cmd.extend(['-task', task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(('-' + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #27
0
def _do_blast_local(query_fpath, db_fpath, program, out_fpath, params=None):
    "It does a blast"
    if not params:
        params = {}
    evalue, task = _parse_blast_params(params, program)

    if "outfmt" in params:
        outfmt = params["outfmt"]
        del params["outfmt"]
    else:
        outfmt = 5

    if program not in ("blastn", "blastp", "blastx", "tblastx", "tblastn"):
        raise ValueError("The given program is invalid: " + str(program))
    binary = get_binary_path(program)
    cmd = [binary, "-query", query_fpath, "-db", db_fpath, "-out", out_fpath]
    cmd.extend(["-evalue", str(evalue), "-outfmt", str(outfmt)])
    if task:
        cmd.extend(["-task", task])
    if params:
        for key, value in params.viewitems():
            cmd.extend(("-" + key, str(value)))
    process = popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    check_process_finishes(process, binary=cmd[0])
Example #28
0
def map_with_bwasw(index_fpath,
                   bam_fpath,
                   unpaired_fpath=None,
                   paired_fpaths=None,
                   readgroup=None,
                   threads=None,
                   log_fpath=None,
                   extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = [
            'java', '-jar',
            os.path.join(picard_tools,
                         'AddOrReplaceReadGroups.jar'), 'INPUT=/dev/stdin',
            'OUTPUT={0}'.format(bam_fpath), 'RGID={0}'.format(readgroup['ID']),
            'RGLB={0}'.format(readgroup['LB']),
            'RGPL={0}'.format(readgroup['PL']),
            'RGSM={0}'.format(readgroup['SM']),
            'RGPU={0}'.format(readgroup['PU']), 'VALIDATION_STRINGENCY=LENIENT'
        ]
    else:
        cmd = [
            get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
            bam_fpath
        ]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Example #29
0
def map_with_bowtie2(index_fpath,
                     bam_fpath,
                     paired_fpaths=None,
                     unpaired_fpaths=None,
                     readgroup=None,
                     threads=None,
                     log_fpath=None,
                     preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [
        binary, '-x', index_fpath, '--{0}'.format(preset), '-p',
        str(get_num_threads(threads))
    ]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [
        get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
        bam_fpath
    ]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()