def test_rev_compl_fragmented_reads(self): reference_seq = GENOME #with unpaired_reads query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG' query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n' query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG' query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n' query = query_f + query_r in_fhand = NamedTemporaryFile() in_fhand.write(query) in_fhand.flush() ref_fhand = NamedTemporaryFile() ref_fhand.write(reference_seq) ref_fhand.flush() index_fpath = get_or_create_bowtie2_index(ref_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], unpaired_fpaths=[in_fhand.name]) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name) #for aligned_read in samfile: # print aligned_read #with paired_reads. #f is reversed r is direct query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += '\n' query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA' query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n' #f is direct, r is reversed query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC' query3 += '\n' query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG' query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n' query_f = query1 + query3 query_r = query2 + query4 f_fhand = NamedTemporaryFile() f_fhand.write(query_f) f_fhand.flush() r_fhand = NamedTemporaryFile() r_fhand.write(query_r) r_fhand.flush() paired_fpaths = [[f_fhand.name], [r_fhand.name]] ref_fhand = NamedTemporaryFile() ref_fhand.write(reference_seq) ref_fhand.flush() index_fpath = get_or_create_bowtie2_index(ref_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name)
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fpath]) directory.close()
def test_rev_compl_fragmented_reads(self): index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta') # with unpaired_reads query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG' query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n' query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG' query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n' query = query_f + query_r in_fhand = NamedTemporaryFile() in_fhand.write(query) in_fhand.flush() bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], unpaired_fpath=in_fhand.name) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name) # for aligned_read in samfile: # print aligned_read # with paired_reads. # f is reversed r is direct query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += '\n' query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA' query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n' # f is direct, r is reversed query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC' query3 += '\n' query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG' query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n' query_f = query1 + query3 query_r = query2 + query4 f_fhand = NamedTemporaryFile() f_fhand.write(query_f) f_fhand.flush() r_fhand = NamedTemporaryFile() r_fhand.write(query_r) r_fhand.flush() paired_fpaths = (f_fhand.name, r_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name)
def _setup_checks(self, filterpacket): index_fpath = self._index_fpath get_or_create_bowtie2_index(index_fpath) seqs = [s for seqs in filterpacket[SEQS_PASSED] for s in seqs] seq_class = seqs[0].kind extra_params = [] # Which format do we need for the bowtie2 input read file fasta or # fastq? if seq_class == SEQRECORD: if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys(): file_format = 'fastq' else: extra_params.append('-f') file_format = 'fasta' elif seq_class == SEQITEM: file_format = get_file_format(seqs[0]) if 'illumina' in file_format: extra_params.append('--phred64') elif 'fasta' in file_format: extra_params.append('-f') elif 'fastq' in file_format: pass else: msg = 'For FilterBowtie2Match and SeqItems fastq or fasta ' msg += 'files are required' raise RuntimeError(msg) else: raise NotImplementedError() reads_fhand = NamedTemporaryFile(suffix=file_format) write_seqs(seqs, reads_fhand, file_format=file_format) reads_fhand.flush() bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fhand.name], extra_params=extra_params) self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
def _setup_checks(self, filterpacket): index_fpath = self._index_fpath get_or_create_bowtie2_index(index_fpath) seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs] seq_class = seqs[0].kind extra_params = [] # Which format do we need for the bowtie2 input read file fasta or # fastq? if seq_class == SEQRECORD: if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys(): file_format = 'fastq' else: extra_params.append('-f') file_format = 'fasta' elif seq_class == SEQITEM: file_format = get_file_format(seqs[0]) if 'illumina' in file_format: extra_params.append('--phred64') elif 'fasta' in file_format: extra_params.append('-f') elif 'fastq' in file_format: pass else: msg = 'For FilterBowtie2Match and SeqItems fastq or fasta ' msg += 'files are required' raise RuntimeError(msg) else: raise NotImplementedError() reads_fhand = NamedTemporaryFile(suffix=file_format) write_seqs(seqs, reads_fhand, file_format=file_format) reads_fhand.flush() bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fhand.name], extra_params=extra_params) self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath) map_process_to_bam(bowtie2, bam_fhand.name) directory.close() #With paired_fpahts option reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') forward_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') reverse_fpath = NamedTemporaryFile().name paired_fpaths = (forward_fpath, reverse_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) directory.close()
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath) map_process_to_bam(bowtie2, bam_fhand.name) directory.close() # With paired_fpahts option reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') forward_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_1.fastq') reverse_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_2.fastq') paired_fpaths = (forward_fpath, reverse_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) directory.close()