def setUp(self): self.toy_data = [ ["name1", "CAAAT", "latte", "CAGAT", "acqua"], ["name2", "TTAGG", "beefy", "CTTGG", "caffe"], ] self.toy_bwsa = bwa.build_bws_array(self.toy_data, qtype="fastq-illumina")
def test_build_bws_array_with_trim_qual_zero(self): pair = ( # read, qual, trimmed length at qual=15 ("pair1", "ATNCACCTGCCTTGGCCTCGCAAAGTGCTGGGATTACAGGCATGANNNNNNNNNNNNNNNNNNCNGTNNNNNNNNNNNNNTNCANATNNNNNNNNNNNTG", "==#@7::;;?FF?FFGGGFFGGGGE=CDEAE=AE?BBC??@###########################################################", "TTATAAGCACTGCACATAACTTTCTCCCTAATCTTTACAACAATCNNNTNNNCNNANNNNNCNCAGGNNNNCNNGNNNNNGNCANTGNNNNNNNNNNNGT", "EFDFFEEC?BEEAEE=A?CCE??EEAEEEEEECECEDEAC5@;7?#######################################################" ), ) bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=0) self.assertEqual(100, bwsa[0][0].clip_len) self.assertEqual(100, bwsa[1][0].clip_len) for seq_array in bwsa: bwa.free_seq(len(pair), seq_array)
def test_build_bws_array_with_illumina_q(self): pair = ( ("pair1", "GCGTTAGTTTGTGGTGAAAGAAGCAAAATATTATGAATTTTGAAAATCTAGAGTCAATCTTCAAATTTCTTTTTATTCATTAATATCTCTGGCCTTTGTT", "TcLacaU_b_a\\aa`L\\a]X`dddYM^Z\\W]]W]WWbbYTZLZLZRI[VYQVWUU^b\\ccTc\\c^cYcYcZ^_ULUVUX[bY`^BBBBBBBBBBBBBBBB", "ATCAATCTAGGAGTTTGTATATACCACAAGAACAATGCTTGTCAAATCACTCACTTCTTTTCCCCTCTCTGGAGCCAGGCTTCGAGAAGTTGTATCCAGA", "B_a`aaTT__Q\\UQV^Z```\\VRKYQHUVIRLQQTTK]`^Z]]Y]_Y_Yb`da`^`aaac]eKdddfdefefecffffdaffffffefdedcffdeffff"), ) lengths = (85,100) bwsa = bwa.build_bws_array(pair, qtype="fastq-illumina", trim_qual=15) self.assertEqual(lengths[0], bwsa[0][0].clip_len) self.assertEqual(lengths[1], bwsa[1][0].clip_len) for seq_array in bwsa: bwa.free_seq(len(pair), seq_array)
def test_build_bws_array_with_illumina_q(self): pair = (( "pair1", "GCGTTAGTTTGTGGTGAAAGAAGCAAAATATTATGAATTTTGAAAATCTAGAGTCAATCTTCAAATTTCTTTTTATTCATTAATATCTCTGGCCTTTGTT", "TcLacaU_b_a\\aa`L\\a]X`dddYM^Z\\W]]W]WWbbYTZLZLZRI[VYQVWUU^b\\ccTc\\c^cYcYcZ^_ULUVUX[bY`^BBBBBBBBBBBBBBBB", "ATCAATCTAGGAGTTTGTATATACCACAAGAACAATGCTTGTCAAATCACTCACTTCTTTTCCCCTCTCTGGAGCCAGGCTTCGAGAAGTTGTATCCAGA", "B_a`aaTT__Q\\UQV^Z```\\VRKYQHUVIRLQQTTK]`^Z]]Y]_Y_Yb`da`^`aaac]eKdddfdefefecffffdaffffffefdedcffdeffff" ), ) lengths = (85, 100) bwsa = bwa.build_bws_array(pair, qtype="fastq-illumina", trim_qual=15) self.assertEqual(lengths[0], bwsa[0][0].clip_len) self.assertEqual(lengths[1], bwsa[1][0].clip_len) for seq_array in bwsa: bwa.free_seq(len(pair), seq_array)
def test_build_bws_array_without_qual_str(self): # make tuples substituting None for the quality strings in toy_data toy = map(lambda t: (t[0], t[1], None, t[3], None), self.toy_data) bwsa = bwa.build_bws_array(toy) for i in xrange(len(self.toy_data)): # verify name, sequence (which is reversed) self.assertEqual(self.toy_data[i][0], bwsa[0][i].get_name()) self.assertEqual(self.toy_data[i][0], bwsa[1][i].get_name()) self.assertEqual(self.toy_data[i][1], bwsa[0][i].get_seq()[::-1]) self.assertEqual(self.toy_data[i][3], bwsa[1][i].get_seq()[::-1]) # ensure that the returned quality string is empty self.assertEqual('', bwsa[0][i].get_qual_string()) self.assertEqual('', bwsa[1][i].get_qual_string()) for seq_array in bwsa: bwa.free_seq(len(toy), seq_array)
def test_trimming_min_length(self): pair = ( # (name, r1, q1, r2, q2) ("seq_name", "NNNNNNNNNNNNACAGCTTTTGAGNGGGGCNCAGCCTCTTCCTTGTGGTGTTGCAGGACGGCAGGGAGTCAGTTGGGTTTCNACTCTTTAAGGACAGTGA", "####################################################################################################", "TTTTTATTTCTAGTGTTTATATTGATGAACAGAAATATACTGACATATTAACTTTTTTCATATAAATTTTTTCAAATTTTTGGTTAAGGCTTTTTCTGTC", "GGGGGF@EGGD;EEEGG??EDEEDAE==E?EEE??>B6C:=CDCBGFGFFFGFGGF=EF5<?=)=9B6?BAA-A:;;>;5*2*57AA>AA>AA>CA@CED"), ) # The first sequence should have a clipped length of 35 at q=15. # 35 is the minimum trimmed sequence length. bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=15) self.assertEqual(100, bwsa[0][0].full_len) self.assertEqual(35, bwsa[0][0].clip_len) self.assertEqual(35, bwsa[0][0].len) self.assertEqual(100, bwsa[1][0].clip_len) # this one isn't clipped at q=15 for seq_array in bwsa: bwa.free_seq(len(pair), seq_array)
def test_trimming_min_length(self): pair = ( # (name, r1, q1, r2, q2) ("seq_name", "NNNNNNNNNNNNACAGCTTTTGAGNGGGGCNCAGCCTCTTCCTTGTGGTGTTGCAGGACGGCAGGGAGTCAGTTGGGTTTCNACTCTTTAAGGACAGTGA", "####################################################################################################", "TTTTTATTTCTAGTGTTTATATTGATGAACAGAAATATACTGACATATTAACTTTTTTCATATAAATTTTTTCAAATTTTTGGTTAAGGCTTTTTCTGTC", "GGGGGF@EGGD;EEEGG??EDEEDAE==E?EEE??>B6C:=CDCBGFGFFFGFGGF=EF5<?=)=9B6?BAA-A:;;>;5*2*57AA>AA>AA>CA@CED" ), ) # The first sequence should have a clipped length of 35 at q=15. # 35 is the minimum trimmed sequence length. bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=15) self.assertEqual(100, bwsa[0][0].full_len) self.assertEqual(35, bwsa[0][0].clip_len) self.assertEqual(35, bwsa[0][0].len) self.assertEqual(100, bwsa[1][0].clip_len) # this one isn't clipped at q=15 for seq_array in bwsa: bwa.free_seq(len(pair), seq_array)
def setUp(self): self.toy_data = [ [ "name1", "CAAAT", "latte", "CAGAT", "acqua" ], [ "name2", "TTAGG", "beefy", "CTTGG", "caffe" ], ] self.toy_bwsa = bwa.build_bws_array(self.toy_data, qtype="fastq-illumina")