def test_add_rg_to_bam(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bwa_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') lib_name = 'aa' log_fhand = NamedTemporaryFile() readgroup = { 'ID': lib_name, 'PL': 'illumina', 'LB': lib_name, 'SM': '{0}_illumina_pe'.format(lib_name), 'PU': '0' } bwa = map_with_bwamem(index_fpath, unpaired_fpath=reads_fpath, readgroup=readgroup, log_fpath=log_fhand.name) map_process_to_bam(bwa, bam_fhand.name) out = subprocess.check_output( [get_binary_path('samtools'), 'view', '-h', bam_fhand.name], stderr=log_fhand) assert '@RG\tID:aa' in out assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out directory.close()
def test_get_or_create_index(self): db_name = 'arabidopsis_genes' seq_fpath = os.path.join(TEST_DATA_DIR, db_name) assert not _bwa_index_exists(seq_fpath) directory = TemporaryDir() index_fpath = get_or_create_bwa_index(seq_fpath, directory.name) expected_index = os.path.join(directory.name, os.path.basename(db_name)) assert index_fpath == expected_index assert _bwa_index_exists(index_fpath) # already exists index_fpath = get_or_create_bwa_index(seq_fpath, directory.name) assert index_fpath == expected_index assert _bwa_index_exists(index_fpath) directory.close()
def test_map_with_bwa(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bwa_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath) out = subprocess.check_output([get_binary_path('samtools'), 'view', bam_fhand.name]) assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out directory.close()
def test_map_with_bwa(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bwa_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath) out = subprocess.check_output( [get_binary_path('samtools'), 'view', bam_fhand.name]) assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out directory.close()
def test_add_rg_to_bam(self): reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes') reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq') directory = TemporaryDir() index_fpath = get_or_create_bwa_index(reference_fpath, directory.name) bam_fhand = NamedTemporaryFile(suffix='.bam') lib_name = 'aa' log_fhand = NamedTemporaryFile() readgroup = {'ID': lib_name, 'PL': 'illumina', 'LB': lib_name, 'SM': '{0}_illumina_pe'.format(lib_name), 'PU': '0'} bwa = map_with_bwamem(index_fpath, unpaired_fpath=reads_fpath, readgroup=readgroup, log_fpath=log_fhand.name) map_process_to_bam(bwa, bam_fhand.name) out = subprocess.check_output([get_binary_path('samtools'), 'view', '-h', bam_fhand.name], stderr=log_fhand) assert '@RG\tID:aa' in out assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out directory.close()
def _sorted_mapped_reads(ref_fpath, paired_fpaths=None, unpaired_fpaths=None, directory=None, file_format=None, min_seed_len=None): fhand = open(paired_fpaths[0]) if paired_fpaths else open(unpaired_fpaths[0]) if file_format is not None: set_format(fhand, file_format) else: file_format = get_format(fhand) index_fpath = get_or_create_bwa_index(ref_fpath, directory) extra_params = ['-a', '-M'] if min_seed_len is not None: extra_params.extend(['-k', min_seed_len]) bwa = map_with_bwamem(index_fpath, paired_fpaths=paired_fpaths, unpaired_fpath=unpaired_fpaths, extra_params=extra_params) bam_fhand = NamedTemporaryFile(dir='/home/carlos/tmp') sort_mapped_reads(bwa, bam_fhand.name, key='queryname') bamfile = pysam.Samfile(bam_fhand.name) return bamfile
def test_rev_compl_fragmented_reads(self): reference_seq = GENOME #with paired_reads. #f is reversed r is direct query1 = '>seq10 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += '\n' query2 = '>seq10 r\nATGTAATACGGGCTAGCCGGGGATGCCGACGATTAAACACGCTGTCATA' query2 += 'GTAGCGTCTTCCTAGGGTTTTCCCCATGGAATCGGTTATCGTGATACGTTAAATTT\n' #f is direct, r is reversed query3 = '>seq11 f\nAAGTTCAATGTAGCAAGGGTACATGCTGACGAGATGGGTCTGCGATCCC' query3 += '\n' query4 = '>seq11 r\nAAATTTAACGTATCACGATAACCGATTCCATGGGGAAAACCCTAGGAAG' query4 += 'ACGCTACTATGACAGCGTGTTTAATCGTCGGCATCCCCGGCTAGCCCGTATTACAT\n' #f is fragmented in two reference sequences. r mapps completely query7 = '>seq4 f\nCAAATCATCACCAGACCATGTCCGATCCCGGGAGTCTTTTCCAAGGTGTGC' query7 += 'TCTTTATCCGGCCCTTGCTCAAGGGTATGTTAAAACGGCAAGAGCTGCCTGAGCGCG\n' query8 = '>seq4 r\nTGTTCTGCAATCGATACAACGATCGAATTTAATCTGAGTAACTGCCAATTC' query8 += 'TGAGTAATATTATAGAAAGT\n' query_f = query1 + query3 + query7 query_r = query2 + query4 + query8 f_fhand = NamedTemporaryFile() f_fhand.write(query_f) f_fhand.flush() r_fhand = NamedTemporaryFile() r_fhand.write(query_r) r_fhand.flush() paired_fpaths = [f_fhand.name, r_fhand.name] ref_fhand = NamedTemporaryFile() ref_fhand.write(reference_seq) ref_fhand.flush() index_fpath = get_or_create_bwa_index(ref_fhand.name) bam_fhand = NamedTemporaryFile(suffix='.bam') bwa = map_with_bwamem(index_fpath, paired_fpaths=paired_fpaths) map_process_to_bam(bwa, bam_fhand.name) samfile = pysam.Samfile(bam_fhand.name)