def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False): 'It realigns the bam using GATK Local realignment around indels' # reference sam index _create_sam_reference_index(reference_fpath) # reference picard dict _create_picard_dict(reference_fpath) # bam index index_bam(bam_fpath) # the intervals to realign # gatk_dir = get_setting("GATK_DIR") # gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar') gatk_jar = get_setting('GATK_JAR') intervals_fhand = NamedTemporaryFile(suffix='.intervals') stderr = NamedTemporaryFile(suffix='picard.stderr') stdout = NamedTemporaryFile(suffix='picard.stdout') cmd = ['java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name] check_call(cmd, stderr=stderr, stdout=stdout) # the realignment itself cmd = ['java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', out_bam_fpath] if threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) check_call(cmd, stderr=stderr, stdout=stdout) intervals_fhand.close()
def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False): 'It realigns the bam using GATK Local realignment around indels' # reference sam index _create_sam_reference_index(reference_fpath) # reference picard dict _create_picard_dict(reference_fpath) # bam index index_bam(bam_fpath) # the intervals to realign # gatk_dir = get_setting("GATK_DIR") # gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar') gatk_jar = get_setting('GATK_JAR') intervals_fhand = NamedTemporaryFile(suffix='.intervals') stderr = NamedTemporaryFile(suffix='picard.stderr') stdout = NamedTemporaryFile(suffix='picard.stdout') cmd = [ 'java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name ] check_call(cmd, stderr=stderr, stdout=stdout) # the realignment itself cmd = [ 'java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', out_bam_fpath ] if threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) check_call(cmd, stderr=stderr, stdout=stdout) intervals_fhand.close()
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None, threads=None, log_fpath=None, extra_params=None): 'It maps with bwa ws algorithm' if paired_fpaths is None and unpaired_fpath is None: raise RuntimeError('At least one file to map is required') elif paired_fpaths is not None and unpaired_fpath is not None: msg = 'Bwa can not map unpaired and unpaired reads together' raise RuntimeError(msg) if extra_params is None: extra_params = [] binary = get_binary_path('bwa') cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath] cmd.extend(extra_params) if paired_fpaths is not None: cmd.extend(paired_fpaths) if unpaired_fpath is not None: cmd.append(unpaired_fpath) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') #raw_input(' '.join(cmd)) bwa = popen(cmd, stderr=stderr, stdout=PIPE) return bwa
def map_with_bowtie2(index_fpath, bam_fpath, paired_fpaths=None, unpaired_fpaths=None, readgroup=None, threads=None, log_fpath=None, preset='very-sensitive-local', extra_params=None): '''It maps with bowtie2. paired_seqs is a list of tuples, in which each tuple are paired seqs unpaired_seqs is a list of files ''' if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] if paired_fpaths is None and unpaired_fpaths is None: raise RuntimeError('At least one file to map is required') binary = get_binary_path('bowtie2') cmd = [binary, '-x', index_fpath, '--{0}'.format(preset), '-p', str(get_num_threads(threads))] cmd.extend(extra_params) if unpaired_fpaths: cmd.extend(['-U', ','.join(unpaired_fpaths)]) if paired_fpaths: plus = [pairs[0] for pairs in paired_fpaths] minus = [pairs[1] for pairs in paired_fpaths] cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)]) if 'ID' in readgroup.keys(): for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) if key == 'ID': cmd.extend(['--rg-id', value]) else: cmd.extend(['--rg', '{0}:{1}'.format(key, value)]) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') # raw_input(' '.join(cmd)) bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE) # print bowtie2.stdout.read() cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o', bam_fpath] samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr) bowtie2.stdout.close() # Allow p1 to receive a SIGPIPE if samtools exits. samtools.communicate()
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None, paired_fpaths=None, threads=None, log_fpath=None, extra_params=None, readgroup=None, mate_inner_dist=None, mate_std_dev=None): if unpaired_fpath is not None and paired_fpaths is not None: msg = "Tophat devs don't recommend mixing paired and unpaired reads" raise RuntimeError(msg) if extra_params is None: extra_params = [] standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed', '--keep-fasta-order'] for standar_param in standar_params: if standar_param not in extra_params: extra_params.append(standar_param) if threads is not None: extra_params.extend(['-p', str(get_num_threads(threads))]) if paired_fpaths: if mate_inner_dist is None or mate_std_dev is None: raise RuntimeError('with paires reads inner-dist is mandatory') extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev', str(mate_std_dev)]) extra_params.extend(['-o', out_dir]) if readgroup is not None: for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]), value]) cmd = ['tophat'] cmd.extend(extra_params) cmd.append(index_fpath) if paired_fpaths: cmd.extend(paired_fpaths) if unpaired_fpath: cmd.append(unpaired_fpath) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') #print " ".join(cmd) #return tophat = popen(cmd, stderr=stderr, stdout=PIPE) tophat.communicate() if tophat.returncode: sys.stderr.write('Error in tophat process\n')
def map_with_bwasw(index_fpath, bam_fpath, unpaired_fpath=None, paired_fpaths=None, readgroup=None, threads=None, log_fpath=None, extra_params=None): 'It maps with bwa ws algorithm' if paired_fpaths is None and unpaired_fpath is None: raise RuntimeError('At least one file to map is required') elif paired_fpaths is not None and unpaired_fpath is not None: msg = 'Bwa can not map unpaired and unpaired reads together' raise RuntimeError(msg) if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] binary = get_binary_path('bwa') cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath] cmd.extend(extra_params) if paired_fpaths is not None: cmd.extend(paired_fpaths) if unpaired_fpath is not None: cmd.append(unpaired_fpath) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') #raw_input(' '.join(cmd)) bwa = popen(cmd, stderr=stderr, stdout=PIPE) # add readgroup using picard picard_tools = get_setting("PICARD_TOOLS_DIR") if readgroup: cmd = ['java', '-jar', os.path.join(picard_tools, 'AddOrReplaceReadGroups.jar'), 'INPUT=/dev/stdin', 'OUTPUT={0}'.format(bam_fpath), 'RGID={0}'.format(readgroup['ID']), 'RGLB={0}'.format(readgroup['LB']), 'RGPL={0}'.format(readgroup['PL']), 'RGSM={0}'.format(readgroup['SM']), 'RGPU={0}'.format(readgroup['PU']), 'VALIDATION_STRINGENCY=LENIENT'] else: cmd = [get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o', bam_fpath] samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr) bwa.stdout.close() # Allow p1 to receive a SIGPIPE if samtools exits. samtools.communicate() if bwa.returncode or samtools.returncode: raise RuntimeError(open(stderr.name).read())
def map_with_bwamem(index_fpath, unpaired_fpath=None, paired_fpaths=None, interleave_fpath=None, threads=None, log_fpath=None, extra_params=None, readgroup=None): 'It maps with bwa mem algorithm' interleave = False num_called_fpaths = 0 in_fpaths = [] if unpaired_fpath is not None: num_called_fpaths += 1 in_fpaths.append(unpaired_fpath) if paired_fpaths is not None: num_called_fpaths += 1 in_fpaths.extend(paired_fpaths) if interleave_fpath is not None: num_called_fpaths += 1 in_fpaths.append(interleave_fpath) interleave = True if num_called_fpaths == 0: raise RuntimeError('At least one file to map is required') if num_called_fpaths > 1: msg = 'Bwa can not map unpaired and unpaired reads together' raise RuntimeError(msg) if extra_params is None: extra_params = [] if '-p' in extra_params: extra_params.remove('-p') if interleave: extra_params.append('-p') if readgroup is not None: rg_str = '@RG\tID:{ID}\tSM:{SM}\tPL:{PL}\tLB:{LB}'.format(**readgroup) extra_params.extend(['-R', rg_str]) binary = get_binary_path('bwa') cmd = [binary, 'mem', '-t', str(get_num_threads(threads)), index_fpath] cmd.extend(extra_params) cmd.extend(in_fpaths) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') #raw_input(' '.join(cmd)) bwa = popen(cmd, stderr=stderr, stdout=PIPE) return bwa
def map_with_tophat(index_fpath, out_dir, unpaired_fpath=None, paired_fpaths=None, threads=None, log_fpath=None, extra_params=None, readgroup=None, mate_inner_dist=None, mate_std_dev=None): if unpaired_fpath is not None and paired_fpaths is not None: msg = "Tophat devs don't recommend mixing paired and unpaired reads" raise RuntimeError(msg) if extra_params is None: extra_params = [] standar_params = ['--b2-very-sensitive', '--no-discordant', '--no-mixed', '--keep-fasta-order'] for standar_param in standar_params: if standar_param not in extra_params: extra_params.append(standar_param) if threads is not None: extra_params.extend(['-p', str(get_num_threads(threads))]) if paired_fpaths: if mate_inner_dist is None or mate_std_dev is None: raise RuntimeError('with paires reads inner-dist is mandatory') extra_params.extend(['-r', str(mate_inner_dist), '--mate-std-dev', str(mate_std_dev)]) extra_params.extend(['-o', out_dir]) if readgroup is not None: for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) extra_params.extend(['--rg-{}'.format(TOPHAT_RG_TRANSLATOR[key]), value]) cmd = ['tophat'] cmd.extend(extra_params) cmd.append(index_fpath) if paired_fpaths: cmd.extend(paired_fpaths) if unpaired_fpath: cmd.append(unpaired_fpath) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') # raw_input(' '.join(cmd)) tophat = popen(cmd, stderr=stderr, stdout=PIPE) tophat.communicate()
def map_with_bowtie2(index_fpath, paired_fpaths=None, unpaired_fpath=None, readgroup=None, threads=None, log_fpath=None, preset='very-sensitive-local', extra_params=None): '''It maps with bowtie2. paired_seqs is a list of tuples, in which each tuple are paired seqs unpaired_seqs is a list of files ''' if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] if paired_fpaths is None and unpaired_fpath is None: raise RuntimeError('At least one file to map is required') binary = get_binary_path('bowtie2') cmd = [binary, '-x', index_fpath, '--{0}'.format(preset), '-p', str(get_num_threads(threads))] cmd.extend(extra_params) if unpaired_fpath: cmd.extend(['-U', unpaired_fpath]) if paired_fpaths: cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]]) if 'ID' in readgroup.keys(): for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) if key == 'ID': cmd.extend(['--rg-id', value]) else: cmd.extend(['--rg', '{0}:{1}'.format(key, value)]) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE) # print bowtie2.stdout.read() return bowtie2
def map_with_hisat2(index_fpath, paired_fpaths=None, unpaired_fpath=None, readgroup=None, threads=None, log_fhand=None, extra_params=None): '''It maps with hisat2. paired_seqs is a list of tuples, in which each tuple are paired seqs unpaired_seqs is a list of files ''' if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] if paired_fpaths is None and unpaired_fpath is None: raise RuntimeError('At least one file to map is required') binary = get_binary_path('hisat2') cmd = [binary, '-x', index_fpath, '-p', str(get_num_threads(threads))] cmd.extend(extra_params) if unpaired_fpath: cmd.extend(['-U', unpaired_fpath]) if paired_fpaths: cmd.extend(['-1', paired_fpaths[0], '-2', paired_fpaths[1]]) if 'ID' in readgroup.keys(): for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) if key == 'ID': cmd.extend(['--rg-id', value]) else: cmd.extend(['--rg', '{0}:{1}'.format(key, value)]) hisat2 = popen(cmd, stderr=log_fhand, stdout=PIPE) return hisat2
def map_with_bwasw(index_fpath, bam_fpath, unpaired_fpath=None, paired_fpaths=None, readgroup=None, threads=None, log_fpath=None, extra_params=None): 'It maps with bwa ws algorithm' if paired_fpaths is None and unpaired_fpath is None: raise RuntimeError('At least one file to map is required') elif paired_fpaths is not None and unpaired_fpath is not None: msg = 'Bwa can not map unpaired and unpaired reads together' raise RuntimeError(msg) if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] binary = get_binary_path('bwa') cmd = [binary, 'bwasw', '-t', str(get_num_threads(threads)), index_fpath] cmd.extend(extra_params) if paired_fpaths is not None: cmd.extend(paired_fpaths) if unpaired_fpath is not None: cmd.append(unpaired_fpath) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') #raw_input(' '.join(cmd)) bwa = popen(cmd, stderr=stderr, stdout=PIPE) # add readgroup using picard picard_tools = get_setting("PICARD_TOOLS_DIR") if readgroup: cmd = [ 'java', '-jar', os.path.join(picard_tools, 'AddOrReplaceReadGroups.jar'), 'INPUT=/dev/stdin', 'OUTPUT={0}'.format(bam_fpath), 'RGID={0}'.format(readgroup['ID']), 'RGLB={0}'.format(readgroup['LB']), 'RGPL={0}'.format(readgroup['PL']), 'RGSM={0}'.format(readgroup['SM']), 'RGPU={0}'.format(readgroup['PU']), 'VALIDATION_STRINGENCY=LENIENT' ] else: cmd = [ get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o', bam_fpath ] samtools = popen(cmd, stdin=bwa.stdout, stderr=stderr) bwa.stdout.close() # Allow p1 to receive a SIGPIPE if samtools exits. samtools.communicate() if bwa.returncode or samtools.returncode: raise RuntimeError(open(stderr.name).read())
def map_with_bowtie2(index_fpath, bam_fpath, paired_fpaths=None, unpaired_fpaths=None, readgroup=None, threads=None, log_fpath=None, preset='very-sensitive-local', extra_params=None): '''It maps with bowtie2. paired_seqs is a list of tuples, in which each tuple are paired seqs unpaired_seqs is a list of files ''' if readgroup is None: readgroup = {} if extra_params is None: extra_params = [] if paired_fpaths is None and unpaired_fpaths is None: raise RuntimeError('At least one file to map is required') binary = get_binary_path('bowtie2') cmd = [ binary, '-x', index_fpath, '--{0}'.format(preset), '-p', str(get_num_threads(threads)) ] cmd.extend(extra_params) if unpaired_fpaths: cmd.extend(['-U', ','.join(unpaired_fpaths)]) if paired_fpaths: plus = [pairs[0] for pairs in paired_fpaths] minus = [pairs[1] for pairs in paired_fpaths] cmd.extend(['-1', ','.join(plus), '-2', ','.join(minus)]) if 'ID' in readgroup.keys(): for key, value in readgroup.items(): if key not in ('ID', 'LB', 'SM', 'PL'): msg = 'The readgroup header tag is not valid: {}'.format(key) raise RuntimeError(msg) if key == 'ID': cmd.extend(['--rg-id', value]) else: cmd.extend(['--rg', '{0}:{1}'.format(key, value)]) if log_fpath is None: stderr = NamedTemporaryFile(suffix='.stderr') else: stderr = open(log_fpath, 'w') # raw_input(' '.join(cmd)) bowtie2 = popen(cmd, stderr=stderr, stdout=PIPE) # print bowtie2.stdout.read() cmd = [ get_binary_path('samtools'), 'view', '-h', '-b', '-S', '-', '-o', bam_fpath ] samtools = popen(cmd, stdin=bowtie2.stdout, stderr=stderr) bowtie2.stdout.close() # Allow p1 to receive a SIGPIPE if samtools exits. samtools.communicate()