def index_reference(fasta_file, logger): gf_utils.info_header(logger, "index reference file using bowtie2") process = subprocess.Popen(['bowtie2-build', '-f', fasta_file, fasta_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.log_process( logger, process, log_error_to="bowtie2: reference indexing successfully completed") gf_utils.info_header(logger, 'bowtie2: reference indexing successfully completed')
def run_samtools_bam(path_to_tmp_file, fasta_file, prefix, logger): sam_output = path_to_tmp_file + "/" + prefix + ".sam" bam_output = path_to_tmp_file + "/" + prefix + ".bam" bam_sorted_output = path_to_tmp_file + "/" + prefix + ".sorted" bam_sorted_index_output = bam_sorted_output + ".bam" pileup_output = path_to_tmp_file + "/" + prefix + '.pileup' gf_utils.info_header(logger, 'converting .sam to .bam') process = subprocess.Popen([ 'samtools', 'view', '-b', '-o', bam_output, '-q', '1', '-S', sam_output + '.mod' ], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.log_process(logger, process, log_error_to="info") gf_utils.info_header(logger, 'samtools sorting .bam file') process = subprocess.Popen( ['samtools', 'sort', bam_output, bam_sorted_output], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.log_process(logger, process, log_error_to="info") gf_utils.info_header(logger, 'samtools indexing .bam file') process = subprocess.Popen(['samtools', 'index', bam_sorted_index_output], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.log_process(logger, process, log_error_to="info") gf_utils.info_header(logger, 'generating mpileup file') gf_utils.info_header( logger, ' '.join( map(str, [ 'samtools', 'mpileup', '-A', '-B', '-f', fasta_file, bam_sorted_index_output ]))) with open(pileup_output, 'w') as sam_pileup: process = subprocess.Popen([ 'samtools', 'mpileup', '-A', '-B', '-f', fasta_file, bam_sorted_index_output ], stderr=subprocess.PIPE, stdout=subprocess.PIPE) for x in process.stdout: sam_pileup.write(x) process.wait() gf_utils.log_process(logger, process, log_error_to="couldn't generate mpileup file")
def samtools_faidx(fasta_file, logger): process = subprocess.Popen(['samtools', 'faidx', fasta_file], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.log_process(logger, process, log_error_to="info") fai_index = fasta_file + '.fai' if os.path.getsize(fai_index) > 0: gf_utils.info_header(logger, 'samtootls faidx sucsessfully created') else: gf_utils.info_header( logger, 'samtootls faidx output is empty, check reference fasta format') sys.exit(1)
def modify_bowtie_sam(path_sam_folder, prefix, logger): path_sam_file = path_sam_folder + "/" + prefix + ".sam" # prefix = sample_id (eg. molis id) with open(path_sam_file) as sam, open(path_sam_file + '.mod', 'w') as sam_mod: for line in sam: if not line.startswith('@'): fields = line.split('\t') flag = int(fields[1]) flag = (flag - 256) if (flag > 256) else flag sam_mod.write('\t'.join([fields[0], str(flag)] + fields[2:])) else: sam_mod.write(line) gf_utils.info_header( logger, 'SAM file successfully mofified to unset secondary alignment')
def run_bowtie_on_indices(fasta_file, forward_fastq, reverse_fastq, outdir, workflow_name, version, prefix, bowtie_options, logger): sam_output = outdir + "/tmp/" + prefix + '.sam' gf_utils.info_header(logger, 'running bowtie command') bowtie_options_combined = ' '.join( bowtie_options ) # bowtie_option is a list need to be joined by space before assigned to the command line process = subprocess.Popen([ 'bowtie2', '-1', forward_fastq, '-2', reverse_fastq, '-x', fasta_file, '-S', sam_output, bowtie_options_combined ], stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.wait() gf_utils.info_header(logger, 'SAM file successfully created')