Ejemplo n.º 1
0
def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False):
    'It realigns the bam using GATK Local realignment around indels'
    # reference sam index
    _create_sam_reference_index(reference_fpath)

    # reference picard dict
    _create_picard_dict(reference_fpath)

    # bam index
    index_bam(bam_fpath)

    # the intervals to realign
#     gatk_dir = get_setting("GATK_DIR")
#     gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar')
    gatk_jar = get_setting('GATK_JAR')
    intervals_fhand = NamedTemporaryFile(suffix='.intervals')
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    stdout = NamedTemporaryFile(suffix='picard.stdout')
    cmd = ['java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator',
           '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name]
    check_call(cmd, stderr=stderr, stdout=stdout)

    # the realignment itself
    cmd = ['java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath,
           '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name,
           '-o', out_bam_fpath]

    if threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    check_call(cmd, stderr=stderr, stdout=stdout)
    intervals_fhand.close()
Ejemplo n.º 2
0
def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False):
    'It realigns the bam using GATK Local realignment around indels'
    # reference sam index
    _create_sam_reference_index(reference_fpath)

    # reference picard dict
    _create_picard_dict(reference_fpath)

    # bam index
    index_bam(bam_fpath)

    # the intervals to realign
    #     gatk_dir = get_setting("GATK_DIR")
    #     gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar')
    gatk_jar = get_setting('GATK_JAR')
    intervals_fhand = NamedTemporaryFile(suffix='.intervals')
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    stdout = NamedTemporaryFile(suffix='picard.stdout')
    cmd = [
        'java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I',
        bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name
    ]
    check_call(cmd, stderr=stderr, stdout=stdout)

    # the realignment itself
    cmd = [
        'java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T',
        'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o',
        out_bam_fpath
    ]

    if threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    check_call(cmd, stderr=stderr, stdout=stdout)
    intervals_fhand.close()
Ejemplo n.º 3
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                   threads=None, log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Ejemplo n.º 4
0
def map_with_bowtie2(index_fpath, bam_fpath, paired_fpaths=None,
                     unpaired_fpaths=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
           bam_fpath]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
Ejemplo n.º 5
0
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None,
                    paired_fpaths=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None, mate_inner_dist=None,
                    mate_std_dev=None):
    if unpaired_fpath is not None and paired_fpaths is not None:
        msg = "Tophat devs don't recommend mixing paired and unpaired reads"
        raise RuntimeError(msg)
    if extra_params is None:
        extra_params = []

    standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed',
                      '--keep-fasta-order']
    for standar_param in standar_params:
        if standar_param not in extra_params:
            extra_params.append(standar_param)
    if threads is not None:
        extra_params.extend(['-p', str(get_num_threads(threads))])

    if paired_fpaths:
        if mate_inner_dist is None or mate_std_dev is None:
            raise RuntimeError('with paires reads inner-dist is mandatory')
        extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev',
                             str(mate_std_dev)])

    extra_params.extend(['-o', out_dir])
    if readgroup is not None:
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]),
                                 value])
    cmd = ['tophat']
    cmd.extend(extra_params)
    cmd.append(index_fpath)

    if paired_fpaths:
        cmd.extend(paired_fpaths)

    if unpaired_fpath:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #print " ".join(cmd)
    #return
    tophat = popen(cmd, stderr=stderr, stdout=PIPE)
    tophat.communicate()

    if tophat.returncode:
        sys.stderr.write('Error in tophat process\n')
Ejemplo n.º 6
0
def map_with_bwasw(index_fpath, bam_fpath, unpaired_fpath=None,
                    paired_fpaths=None, readgroup=None, threads=None,
                    log_fpath=None, extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = ['java', '-jar',
           os.path.join(picard_tools, 'AddOrReplaceReadGroups.jar'),
           'INPUT=/dev/stdin', 'OUTPUT={0}'.format(bam_fpath),
           'RGID={0}'.format(readgroup['ID']),
           'RGLB={0}'.format(readgroup['LB']),
           'RGPL={0}'.format(readgroup['PL']),
           'RGSM={0}'.format(readgroup['SM']),
           'RGPU={0}'.format(readgroup['PU']),
           'VALIDATION_STRINGENCY=LENIENT']
    else:
        cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-',
               '-o', bam_fpath]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Ejemplo n.º 7
0
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None,
                    interleave_fpath=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None):
    'It maps with bwa mem algorithm'
    interleave = False
    num_called_fpaths = 0
    in_fpaths = []
    if unpaired_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(unpaired_fpath)
    if paired_fpaths is not None:
        num_called_fpaths += 1
        in_fpaths.extend(paired_fpaths)
    if interleave_fpath is not None:
        num_called_fpaths += 1
        in_fpaths.append(interleave_fpath)
        interleave = True

    if num_called_fpaths == 0:
        raise RuntimeError('At least one file to map is required')
    if num_called_fpaths > 1:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if extra_params is None:
        extra_params = []

    if '-p' in extra_params:
        extra_params.remove('-p')

    if interleave:
        extra_params.append('-p')

    if readgroup is not None:
        rg_str = '@RG\tID:{ID}\tSM:{SM}\tPL:{PL}\tLB:{LB}'.format(**readgroup)
        extra_params.extend(['-R', rg_str])

    binary = get_binary_path('bwa')
    cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)
    cmd.extend(in_fpaths)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)
    return bwa
Ejemplo n.º 8
0
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None,
                    paired_fpaths=None, threads=None, log_fpath=None,
                    extra_params=None, readgroup=None, mate_inner_dist=None,
                    mate_std_dev=None):
    if unpaired_fpath is not None and paired_fpaths is not None:
        msg = "Tophat devs don't recommend mixing paired and unpaired reads"
        raise RuntimeError(msg)
    if extra_params is None:
        extra_params = []

    standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed',
                      '--keep-fasta-order']
    for standar_param in standar_params:
        if standar_param not in extra_params:
            extra_params.append(standar_param)
    if threads is not None:
        extra_params.extend(['-p', str(get_num_threads(threads))])

    if paired_fpaths:
        if mate_inner_dist is None or mate_std_dev is None:
            raise RuntimeError('with paires reads inner-dist is mandatory')
        extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev',
                             str(mate_std_dev)])

    extra_params.extend(['-o', out_dir])
    if readgroup is not None:
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]),
                                 value])
    cmd = ['tophat']
    cmd.extend(extra_params)
    cmd.append(index_fpath)

    if paired_fpaths:
        cmd.extend(paired_fpaths)

    if unpaired_fpath:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    # raw_input(' '.join(cmd))
    tophat = popen(cmd, stderr=stderr, stdout=PIPE)
    tophat.communicate()
Ejemplo n.º 9
0
def map_with_bowtie2(index_fpath, paired_fpaths=None,
                     unpaired_fpath=None, readgroup=None, threads=None,
                     log_fpath=None, preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [binary, '-x', index_fpath, '--{0}'.format(preset),
           '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    return bowtie2
Ejemplo n.º 10
0
def map_with_hisat2(index_fpath, paired_fpaths=None,
                    unpaired_fpath=None, readgroup=None, threads=None,
                    log_fhand=None, extra_params=None):
    '''It maps with hisat2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('hisat2')
    cmd = [binary, '-x', index_fpath, '-p', str(get_num_threads(threads))]

    cmd.extend(extra_params)
    if unpaired_fpath:
        cmd.extend(['-U', unpaired_fpath])
    if paired_fpaths:
        cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    hisat2 = popen(cmd, stderr=log_fhand, stdout=PIPE)
    return hisat2
Ejemplo n.º 11
0
def map_with_bwasw(index_fpath,
                   bam_fpath,
                   unpaired_fpath=None,
                   paired_fpaths=None,
                   readgroup=None,
                   threads=None,
                   log_fpath=None,
                   extra_params=None):
    'It maps with bwa ws algorithm'
    if paired_fpaths is None and unpaired_fpath is None:
        raise RuntimeError('At least one file to map is required')
    elif paired_fpaths is not None and unpaired_fpath is not None:
        msg = 'Bwa can not map unpaired and unpaired reads together'
        raise RuntimeError(msg)

    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    binary = get_binary_path('bwa')
    cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath]
    cmd.extend(extra_params)

    if paired_fpaths is not None:
        cmd.extend(paired_fpaths)
    if unpaired_fpath is not None:
        cmd.append(unpaired_fpath)

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')
    #raw_input(' '.join(cmd))
    bwa = popen(cmd, stderr=stderr, stdout=PIPE)

    # add readgroup using picard
    picard_tools = get_setting("PICARD_TOOLS_DIR")
    if readgroup:
        cmd = [
            'java', '-jar',
            os.path.join(picard_tools,
                         'AddOrReplaceReadGroups.jar'), 'INPUT=/dev/stdin',
            'OUTPUT={0}'.format(bam_fpath), 'RGID={0}'.format(readgroup['ID']),
            'RGLB={0}'.format(readgroup['LB']),
            'RGPL={0}'.format(readgroup['PL']),
            'RGSM={0}'.format(readgroup['SM']),
            'RGPU={0}'.format(readgroup['PU']), 'VALIDATION_STRINGENCY=LENIENT'
        ]
    else:
        cmd = [
            get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
            bam_fpath
        ]

    samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr)
    bwa.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()
    if bwa.returncode or samtools.returncode:
        raise RuntimeError(open(stderr.name).read())
Ejemplo n.º 12
0
def map_with_bowtie2(index_fpath,
                     bam_fpath,
                     paired_fpaths=None,
                     unpaired_fpaths=None,
                     readgroup=None,
                     threads=None,
                     log_fpath=None,
                     preset='very-sensitive-local',
                     extra_params=None):
    '''It maps with bowtie2.

    paired_seqs is a list of tuples, in which each tuple are paired seqs
    unpaired_seqs is a list of files
    '''
    if readgroup is None:
        readgroup = {}

    if extra_params is None:
        extra_params = []

    if paired_fpaths is None and unpaired_fpaths is None:
        raise RuntimeError('At least one file to map is required')

    binary = get_binary_path('bowtie2')
    cmd = [
        binary, '-x', index_fpath, '--{0}'.format(preset), '-p',
        str(get_num_threads(threads))
    ]

    cmd.extend(extra_params)
    if unpaired_fpaths:
        cmd.extend(['-U', ','.join(unpaired_fpaths)])
    if paired_fpaths:
        plus = [pairs[0] for pairs in paired_fpaths]
        minus = [pairs[1] for pairs in paired_fpaths]
        cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)])

    if 'ID' in readgroup.keys():
        for key, value in readgroup.items():
            if key not in ('ID', 'LB', 'SM', 'PL'):
                msg = 'The readgroup header tag is not valid: {}'.format(key)
                raise RuntimeError(msg)
            if key == 'ID':
                cmd.extend(['--rg-id', value])
            else:
                cmd.extend(['--rg', '{0}:{1}'.format(key, value)])

    if log_fpath is None:
        stderr = NamedTemporaryFile(suffix='.stderr')
    else:
        stderr = open(log_fpath, 'w')

#    raw_input(' '.join(cmd))
    bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE)
    # print bowtie2.stdout.read()
    cmd = [
        get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o',
        bam_fpath
    ]

    samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr)
    bowtie2.stdout.close()  # Allow p1 to receive a SIGPIPE if samtools exits.
    samtools.communicate()