Example #1
0
    def test_bwa_init_sequences_no_trim(self):
        row_type = (ct.c_char_p * 3)
        pointers = (row_type * len(self.toy_data))()
        q_offset = bwa.Q_OFFSET["fastq-illumina"]
        for i, row in enumerate(self.toy_data):
            for j in xrange(3):
                pointers[i][j] = row[j]
        array = bwa.init_sequences(ct.cast(pointers, bwa.p_char_p),
                                   len(self.toy_data), q_offset, 0)

        resulting_qoffset = q_offset - bwa.Q_OFFSET["fastq-sanger"]
        for i, row in enumerate(self.toy_data):
            self.assertEqual(-1, array[i].tid)
            self.assertEqual(len(row[1]), array[i].len)
            self.assertEqual(len(row[1]), array[i].full_len)
            self.assertEqual(len(row[1]), array[i].clip_len)
            self.assertEqual(row[0], array[i].get_name())
            self.assertEqual(row[1][::-1], array[i].get_seq())
            self.assertEqual(
                row[2], ''.join([
                    chr(array[i].qual[k] + resulting_qoffset)
                    for k in xrange(array[i].full_len)
                ]))

        bwa.free_seq(len(self.toy_data), array)
Example #2
0
	def test_init_sequences_with_trimming(self):
		data = []
		data.append(("seq1", "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN",
				"####################################################################################################"))
		row_type = (ct.c_char_p * 3)
		pointers = (row_type*len(data))()
		q_offset = bwa.Q_OFFSET["fastq-sanger"]
		for i, row in enumerate(data):
			for j in xrange(3):
				pointers[i][j] = row[j]
		array = bwa.init_sequences( ct.cast(pointers, bwa.p_char_p), len(data), q_offset, 15)
		self.assertEqual(35, array[0].clip_len)
		bwa.free_seq(len(data), array)
Example #3
0
 def test_build_bws_array_with_trim_qual_zero(self):
     pair = (  # read, qual, trimmed length at qual=15
         ("pair1",
          "ATNCACCTGCCTTGGCCTCGCAAAGTGCTGGGATTACAGGCATGANNNNNNNNNNNNNNNNNNCNGTNNNNNNNNNNNNNTNCANATNNNNNNNNNNNTG",
          "==#@7::;;?FF?FFGGGFFGGGGE=CDEAE=AE?BBC??@###########################################################",
          "TTATAAGCACTGCACATAACTTTCTCCCTAATCTTTACAACAATCNNNTNNNCNNANNNNNCNCAGGNNNNCNNGNNNNNGNCANTGNNNNNNNNNNNGT",
          "EFDFFEEC?BEEAEE=A?CCE??EEAEEEEEECECEDEAC5@;7?#######################################################"
          ), )
     bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=0)
     self.assertEqual(100, bwsa[0][0].clip_len)
     self.assertEqual(100, bwsa[1][0].clip_len)
     for seq_array in bwsa:
         bwa.free_seq(len(pair), seq_array)
Example #4
0
	def test_build_bws_array_with_illumina_q(self):
		pair = (
			("pair1",
			"GCGTTAGTTTGTGGTGAAAGAAGCAAAATATTATGAATTTTGAAAATCTAGAGTCAATCTTCAAATTTCTTTTTATTCATTAATATCTCTGGCCTTTGTT",
			"TcLacaU_b_a\\aa`L\\a]X`dddYM^Z\\W]]W]WWbbYTZLZLZRI[VYQVWUU^b\\ccTc\\c^cYcYcZ^_ULUVUX[bY`^BBBBBBBBBBBBBBBB",
			"ATCAATCTAGGAGTTTGTATATACCACAAGAACAATGCTTGTCAAATCACTCACTTCTTTTCCCCTCTCTGGAGCCAGGCTTCGAGAAGTTGTATCCAGA",
			"B_a`aaTT__Q\\UQV^Z```\\VRKYQHUVIRLQQTTK]`^Z]]Y]_Y_Yb`da`^`aaac]eKdddfdefefecffffdaffffffefdedcffdeffff"),
		)
		lengths = (85,100)
		bwsa = bwa.build_bws_array(pair, qtype="fastq-illumina", trim_qual=15)
		self.assertEqual(lengths[0], bwsa[0][0].clip_len)
		self.assertEqual(lengths[1], bwsa[1][0].clip_len)
		for seq_array in bwsa:
			bwa.free_seq(len(pair), seq_array)
Example #5
0
	def test_build_bws_array_with_trim_qual_zero(self):
		pair = ( # read, qual, trimmed length at qual=15
			("pair1",
			 "ATNCACCTGCCTTGGCCTCGCAAAGTGCTGGGATTACAGGCATGANNNNNNNNNNNNNNNNNNCNGTNNNNNNNNNNNNNTNCANATNNNNNNNNNNNTG",
			 "==#@7::;;?FF?FFGGGFFGGGGE=CDEAE=AE?BBC??@###########################################################",
			 "TTATAAGCACTGCACATAACTTTCTCCCTAATCTTTACAACAATCNNNTNNNCNNANNNNNCNCAGGNNNNCNNGNNNNNGNCANTGNNNNNNNNNNNGT",
			 "EFDFFEEC?BEEAEE=A?CCE??EEAEEEEEECECEDEAC5@;7?#######################################################"
			),
		)
		bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=0)
		self.assertEqual(100, bwsa[0][0].clip_len)
		self.assertEqual(100, bwsa[1][0].clip_len)
		for seq_array in bwsa:
			bwa.free_seq(len(pair), seq_array)
Example #6
0
 def test_build_bws_array_with_illumina_q(self):
     pair = ((
         "pair1",
         "GCGTTAGTTTGTGGTGAAAGAAGCAAAATATTATGAATTTTGAAAATCTAGAGTCAATCTTCAAATTTCTTTTTATTCATTAATATCTCTGGCCTTTGTT",
         "TcLacaU_b_a\\aa`L\\a]X`dddYM^Z\\W]]W]WWbbYTZLZLZRI[VYQVWUU^b\\ccTc\\c^cYcYcZ^_ULUVUX[bY`^BBBBBBBBBBBBBBBB",
         "ATCAATCTAGGAGTTTGTATATACCACAAGAACAATGCTTGTCAAATCACTCACTTCTTTTCCCCTCTCTGGAGCCAGGCTTCGAGAAGTTGTATCCAGA",
         "B_a`aaTT__Q\\UQV^Z```\\VRKYQHUVIRLQQTTK]`^Z]]Y]_Y_Yb`da`^`aaac]eKdddfdefefecffffdaffffffefdedcffdeffff"
     ), )
     lengths = (85, 100)
     bwsa = bwa.build_bws_array(pair, qtype="fastq-illumina", trim_qual=15)
     self.assertEqual(lengths[0], bwsa[0][0].clip_len)
     self.assertEqual(lengths[1], bwsa[1][0].clip_len)
     for seq_array in bwsa:
         bwa.free_seq(len(pair), seq_array)
Example #7
0
	def test_build_bws_array_without_qual_str(self):
		# make tuples substituting None for the quality strings in toy_data
		toy = map(lambda t: (t[0], t[1], None, t[3], None), self.toy_data)
		bwsa = bwa.build_bws_array(toy)
		for i in xrange(len(self.toy_data)):
			# verify name, sequence (which is reversed)
			self.assertEqual(self.toy_data[i][0], bwsa[0][i].get_name())
			self.assertEqual(self.toy_data[i][0], bwsa[1][i].get_name())
			self.assertEqual(self.toy_data[i][1], bwsa[0][i].get_seq()[::-1])
			self.assertEqual(self.toy_data[i][3], bwsa[1][i].get_seq()[::-1])
			# ensure that the returned quality string is empty
			self.assertEqual('', bwsa[0][i].get_qual_string())
			self.assertEqual('', bwsa[1][i].get_qual_string())
		for seq_array in bwsa:
			bwa.free_seq(len(toy), seq_array)
Example #8
0
 def test_build_bws_array_without_qual_str(self):
     # make tuples substituting None for the quality strings in toy_data
     toy = map(lambda t: (t[0], t[1], None, t[3], None), self.toy_data)
     bwsa = bwa.build_bws_array(toy)
     for i in xrange(len(self.toy_data)):
         # verify name, sequence (which is reversed)
         self.assertEqual(self.toy_data[i][0], bwsa[0][i].get_name())
         self.assertEqual(self.toy_data[i][0], bwsa[1][i].get_name())
         self.assertEqual(self.toy_data[i][1], bwsa[0][i].get_seq()[::-1])
         self.assertEqual(self.toy_data[i][3], bwsa[1][i].get_seq()[::-1])
         # ensure that the returned quality string is empty
         self.assertEqual('', bwsa[0][i].get_qual_string())
         self.assertEqual('', bwsa[1][i].get_qual_string())
     for seq_array in bwsa:
         bwa.free_seq(len(toy), seq_array)
Example #9
0
	def test_trimming_min_length(self):
		pair = ( # (name, r1, q1, r2, q2)
			("seq_name",
			 "NNNNNNNNNNNNACAGCTTTTGAGNGGGGCNCAGCCTCTTCCTTGTGGTGTTGCAGGACGGCAGGGAGTCAGTTGGGTTTCNACTCTTTAAGGACAGTGA",
			 "####################################################################################################",
			"TTTTTATTTCTAGTGTTTATATTGATGAACAGAAATATACTGACATATTAACTTTTTTCATATAAATTTTTTCAAATTTTTGGTTAAGGCTTTTTCTGTC",
			"GGGGGF@EGGD;EEEGG??EDEEDAE==E?EEE??>B6C:=CDCBGFGFFFGFGGF=EF5<?=)=9B6?BAA-A:;;>;5*2*57AA>AA>AA>CA@CED"),
		)
		# The first sequence should have a clipped length of 35 at q=15.
		# 35 is the minimum trimmed sequence length.
		bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=15)
		self.assertEqual(100, bwsa[0][0].full_len)
		self.assertEqual(35, bwsa[0][0].clip_len)
		self.assertEqual(35, bwsa[0][0].len)
		self.assertEqual(100, bwsa[1][0].clip_len) # this one isn't clipped at q=15
		for seq_array in bwsa:
			bwa.free_seq(len(pair), seq_array)
Example #10
0
 def test_init_sequences_with_trimming(self):
     data = []
     data.append((
         "seq1",
         "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN",
         "####################################################################################################"
     ))
     row_type = (ct.c_char_p * 3)
     pointers = (row_type * len(data))()
     q_offset = bwa.Q_OFFSET["fastq-sanger"]
     for i, row in enumerate(data):
         for j in xrange(3):
             pointers[i][j] = row[j]
     array = bwa.init_sequences(ct.cast(pointers, bwa.p_char_p), len(data),
                                q_offset, 15)
     self.assertEqual(35, array[0].clip_len)
     bwa.free_seq(len(data), array)
Example #11
0
 def test_trimming_min_length(self):
     pair = (  # (name, r1, q1, r2, q2)
         ("seq_name",
          "NNNNNNNNNNNNACAGCTTTTGAGNGGGGCNCAGCCTCTTCCTTGTGGTGTTGCAGGACGGCAGGGAGTCAGTTGGGTTTCNACTCTTTAAGGACAGTGA",
          "####################################################################################################",
          "TTTTTATTTCTAGTGTTTATATTGATGAACAGAAATATACTGACATATTAACTTTTTTCATATAAATTTTTTCAAATTTTTGGTTAAGGCTTTTTCTGTC",
          "GGGGGF@EGGD;EEEGG??EDEEDAE==E?EEE??>B6C:=CDCBGFGFFFGFGGF=EF5<?=)=9B6?BAA-A:;;>;5*2*57AA>AA>AA>CA@CED"
          ), )
     # The first sequence should have a clipped length of 35 at q=15.
     # 35 is the minimum trimmed sequence length.
     bwsa = bwa.build_bws_array(pair, qtype="fastq-sanger", trim_qual=15)
     self.assertEqual(100, bwsa[0][0].full_len)
     self.assertEqual(35, bwsa[0][0].clip_len)
     self.assertEqual(35, bwsa[0][0].len)
     self.assertEqual(100,
                      bwsa[1][0].clip_len)  # this one isn't clipped at q=15
     for seq_array in bwsa:
         bwa.free_seq(len(pair), seq_array)
Example #12
0
	def test_bwa_init_sequences_no_trim(self):
		row_type = (ct.c_char_p * 3)
		pointers = (row_type*len(self.toy_data))()
		q_offset = bwa.Q_OFFSET["fastq-illumina"]
		for i, row in enumerate(self.toy_data):
			for j in xrange(3):
				pointers[i][j] = row[j]
		array = bwa.init_sequences( ct.cast(pointers, bwa.p_char_p), len(self.toy_data), q_offset, 0)

		resulting_qoffset = q_offset - bwa.Q_OFFSET["fastq-sanger"]
		for i, row in enumerate(self.toy_data):
			self.assertEqual(-1, array[i].tid)
			self.assertEqual(len(row[1]), array[i].len)
			self.assertEqual(len(row[1]), array[i].full_len)
			self.assertEqual(len(row[1]), array[i].clip_len)
			self.assertEqual(row[0], array[i].get_name())
			self.assertEqual(row[1][::-1], array[i].get_seq())
			self.assertEqual(row[2], ''.join( [chr(array[i].qual[k]+resulting_qoffset) for k in xrange(array[i].full_len)] ))

		bwa.free_seq(len(self.toy_data), array)
Example #13
0
	def tearDown(self):
		for seq_array in self.toy_bwsa:
			bwa.free_seq(len(self.toy_data), seq_array)
Example #14
0
 def tearDown(self):
     for seq_array in self.toy_bwsa:
         bwa.free_seq(len(self.toy_data), seq_array)