def test_tophat_paired(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq') reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq') try: directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, paired_fpaths=[reads_1_fpath, reads_2_fpath]) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) self.fail('runtimeError expected') except RuntimeError: pass finally: directory.close() try: directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, paired_fpaths=[reads_1_fpath, reads_2_fpath], mate_inner_dist=350, mate_std_dev=50) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) finally: directory.close()
def test_tophat_paired(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq') reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq') try: directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, paired_fpaths=[reads_1_fpath, reads_2_fpath]) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) self.fail('runtimeError expected') except RuntimeError: pass finally: directory.close() try: directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, paired_fpaths=[reads_1_fpath, reads_2_fpath], mate_inner_dist=350, mate_std_dev=50) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) finally: directory.close()
def test_rev_compl_fragmented_reads(self): reference_seq = GENOME #with unpaired_reads query_f = '>seq1\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCCTG' query_f += 'AGGACACCCAGTCTCCCGGGAGTCTTTTCCAAGGTGTGCTCCTGATCGCCGTGTTA\n' query_r = '>seq2\nTAACACGGCGATCAGGAGCACACCTTGGAAAAGACTCCCGGGAGACTGGGTG' query_r += 'TCCTCAGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT\n' query = query_f + query_r in_fhand = NamedTemporaryFile() in_fhand.write(query) in_fhand.flush() ref_fhand = NamedTemporaryFile() ref_fhand.write(reference_seq) ref_fhand.flush() index_fpath = get_or_create_bowtie2_index(ref_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], unpaired_fpaths=[in_fhand.name]) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name) #for aligned_read in samfile: # print aligned_read #with paired_reads. #f is reversed r is direct query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += '\n' query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA' query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n' #f is direct, r is reversed query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC' query3 += '\n' query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG' query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n' query_f = query1 + query3 query_r = query2 + query4 f_fhand = NamedTemporaryFile() f_fhand.write(query_f) f_fhand.flush() r_fhand = NamedTemporaryFile() r_fhand.write(query_r) r_fhand.flush() paired_fpaths = [[f_fhand.name], [r_fhand.name]] ref_fhand = NamedTemporaryFile() ref_fhand.write(reference_seq) ref_fhand.flush() index_fpath = get_or_create_bowtie2_index(ref_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, extra_params=['-a', '-f'], paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name)
def test_get_or_create_index(self): db_name = 'arabidopsis_genes' seq_fpath = os.path.join(TEST_DATA_DIR, db_name) assert _bowtie2_index_exists(seq_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name) expected_index = os.path.join(directory.name, os.path.basename(db_name)) assert index_fpath == expected_index assert _bowtie2_index_exists(index_fpath) # already exists index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name) assert index_fpath == expected_index assert _bowtie2_index_exists(index_fpath) directory.close()
def test_get_or_create_index(self): db_name = 'arabidopsis_genes' seq_fpath = os.path.join(TEST_DATA_DIR, db_name) assert not _bowtie2_index_exists(seq_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name) expected_index = os.path.join(directory.name, os.path.basename(db_name)) assert index_fpath == expected_index assert _bowtie2_index_exists(index_fpath) # already exists index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name) assert index_fpath == expected_index assert _bowtie2_index_exists(index_fpath) directory.close()
def test_tophat(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, unpaired_fpath=reads_fpath) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) directory.close()
def test_tophat(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) map_with_tophat(index_fpath, directory.name, unpaired_fpath=reads_fpath) os.path.exists(os.path.join(directory.name, 'accepted_hits.bam')) directory.close()
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fpath]) directory.close()
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fpath]) directory.close()
def _setup_checks(self, filterpacket): index_fpath = self._index_fpath get_or_create_bowtie2_index(index_fpath) seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs] seq_class = seqs[0].kind extra_params = [] # Which format do we need for the bowtie2 input read file fasta or # fastq? if seq_class == SEQRECORD: if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys(): file_format = 'fastq' else: extra_params.append('-f') file_format = 'fasta' elif seq_class == SEQITEM: file_format = get_file_format(seqs[0]) if 'illumina' in file_format: extra_params.append('--phred64') elif 'fasta' in file_format: extra_params.append('-f') elif 'fastq' in file_format: pass else: msg = 'For FilterBowtie2Match and SeqItems fastq or fasta ' msg += 'files are required' raise RuntimeError(msg) else: raise NotImplementedError() reads_fhand = NamedTemporaryFile(suffix=file_format) write_seqs(seqs, reads_fhand, file_format=file_format) reads_fhand.flush() bam_fhand = NamedTemporaryFile(suffix='.bam') map_process = map_with_bowtie2(index_fpath, unpaired_fpaths=[reads_fhand.name], extra_params=extra_params) map_process_to_bam(map_process, bam_fhand.name) self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
def _setup_checks(self, filterpacket): index_fpath = self._index_fpath get_or_create_bowtie2_index(index_fpath) seqs = [s for seqs in filterpacket[SEQS_PASSED] for s in seqs] seq_class = seqs[0].kind extra_params = [] # Which format do we need for the bowtie2 input read file fasta or # fastq? if seq_class == SEQRECORD: if 'phred_quality' in seqs[0].object.letter_annotations.viewkeys(): file_format = 'fastq' else: extra_params.append('-f') file_format = 'fasta' elif seq_class == SEQITEM: file_format = get_file_format(seqs[0]) if 'illumina' in file_format: extra_params.append('--phred64') elif 'fasta' in file_format: extra_params.append('-f') elif 'fastq' in file_format: pass else: msg = 'For FilterBowtie2Match and SeqItems fastq or fasta ' msg += 'files are required' raise RuntimeError(msg) else: raise NotImplementedError() reads_fhand = NamedTemporaryFile(suffix=file_format) write_seqs(seqs, reads_fhand, file_format=file_format) reads_fhand.flush() bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bowtie2(index_fpath, bam_fhand.name, unpaired_fpaths=[reads_fhand.name], extra_params=extra_params) self.mapped_reads = _get_mapped_reads(bam_fhand.name, self.min_mapq)
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath) map_process_to_bam(bowtie2, bam_fhand.name) directory.close() #With paired_fpahts option reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') forward_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') reverse_fpath = NamedTemporaryFile().name paired_fpaths = (forward_fpath, reverse_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) directory.close()
def test_map_with_bowtie2(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath) map_process_to_bam(bowtie2, bam_fhand.name) directory.close() # With paired_fpahts option reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') forward_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_1.fastq') reverse_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_2.fastq') paired_fpaths = (forward_fpath, reverse_fpath) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths) map_process_to_bam(bowtie2, bam_fhand.name) directory.close()
def test_filter_by_bowtie2_bin(): filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2') assert 'usage' in check_output([filter_bin, '-h']) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR, 'arabidopsis_genes'), directory=directory.name) fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta') for reads_fpath in [fastq_fpath, fasta_fpath]: out_fhand = NamedTemporaryFile(suffix='.seqs') filtered_fhand = NamedTemporaryFile(suffix='.seqs') cmd = [filter_bin, '-i', index_fpath, '-o', out_fhand.name, '-e', filtered_fhand.name, reads_fpath] check_output(cmd) assert 'no_arabi' in open(out_fhand.name).read() assert 'read1' in open(filtered_fhand.name).read() directory.close()
def test_filter_by_bowtie2_bin(): filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2') assert 'usage' in check_output([filter_bin, '-h']) directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(os.path.join( TEST_DATA_DIR, 'arabidopsis_genes'), directory=directory.name) fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta') for reads_fpath in [fastq_fpath, fasta_fpath]: out_fhand = NamedTemporaryFile(suffix='.seqs') filtered_fhand = NamedTemporaryFile(suffix='.seqs') cmd = [ filter_bin, '-i', index_fpath, '-o', out_fhand.name, '-e', filtered_fhand.name, reads_fpath ] check_output(cmd) assert 'no_arabi' in open(out_fhand.name).read() assert 'read1' in open(filtered_fhand.name).read() directory.close()
def test_filter_by_bowtie2(): directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR, 'arabidopsis_genes'), directory=directory.name) fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta') passed = ['no_arabi'] for preffered_classes in [[SEQITEM], [SEQRECORD]]: for reads_fpath in [fastq_fpath, fasta_fpath]: seq_packets = read_seq_packets([open(reads_fpath)], prefered_seq_classes=preffered_classes) filter_packets = seq_to_filterpackets(seq_packets) filter_ = FilterBowtie2Match(index_fpath) filter_packet = list(filter_packets)[0] filter_packets = filter_(filter_packet) assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [ 'read1', 'read2', 'read3'] directory.close()
def test_filter_by_bowtie2(): directory = TemporaryDir() index_fpath = get_or_create_bowtie2_index(os.path.join( TEST_DATA_DIR, 'arabidopsis_genes'), directory=directory.name) fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta') passed = ['no_arabi'] for preffered_classes in [[SEQITEM], [SEQRECORD]]: for reads_fpath in [fastq_fpath, fasta_fpath]: seq_packets = read_seq_packets( [open(reads_fpath)], prefered_seq_classes=preffered_classes) filter_packets = seq_to_filterpackets(seq_packets) filter_ = FilterBowtie2Match(index_fpath) filter_packet = list(filter_packets)[0] filter_packets = filter_(filter_packet) assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [ 'read1', 'read2', 'read3' ] directory.close()