def test_tough_alignment_5(self): self.do_alignment('5', 0) read = self.aligned_reads['5'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '5') self.assertTrue(alignment.raw_score >= 2792) self.assertTrue(alignment.scaled_score > 89.37) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 5121) < self.pos_margin_of_error) self.assertEqual(read_end, 6396) # end of read self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 1323) < self.pos_margin_of_error)
def test_long_contained_read(self): self.do_alignment('2', 0) read = self.aligned_reads['2'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '2') self.assertTrue(alignment.raw_score >= 122681) self.assertTrue(alignment.scaled_score > 91.19) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # end of read self.assertEqual(read_end, 52096) # end of read self.assertTrue( abs(alignment.ref_start_pos - 2986) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 57064) < self.pos_margin_of_error)
def test_medium_contained_read(self): self.do_alignment('1', 0) read = self.aligned_reads['1'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '1') self.assertTrue(alignment.raw_score >= 16608) self.assertTrue(alignment.scaled_score > 90.12) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # end of read self.assertEqual(read_end, 7360) # end of read self.assertTrue( abs(alignment.ref_start_pos - 68597) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 76202) < self.pos_margin_of_error)
def test_short_contained_read(self): self.do_alignment('0', 0) read = self.aligned_reads['0'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '0') self.assertTrue(alignment.raw_score >= 1418) self.assertTrue(alignment.scaled_score > 90.78) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # end of read self.assertEqual(read_end, 608) # end of read self.assertTrue( abs(alignment.ref_start_pos - 31040) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 31679) < self.pos_margin_of_error)
def test_tough_alignment_12(self): """ Repeats at one end of the read. """ self.do_alignment('12', 0) read = self.aligned_reads['12'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '12') self.assertTrue(alignment.raw_score >= 3805) self.assertTrue(alignment.scaled_score > 92.83) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 3037) < self.pos_margin_of_error) self.assertEqual(read_end, 4611) # end of read self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 1538) < self.pos_margin_of_error)
def test_tough_alignment_11(self): """ Repeats at one end of the read. """ self.do_alignment('11', 0) read = self.aligned_reads['11'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '11') self.assertTrue(alignment.raw_score >= 6996) self.assertTrue(alignment.scaled_score > 92.95) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # start of read self.assertTrue(abs(read_end - 2863) < self.pos_margin_of_error) self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 2818) < self.pos_margin_of_error)
def test_tough_alignment_10(self): """ This is a very low quality read. """ self.do_alignment('10', 0) read = self.aligned_reads['10'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '10') self.assertTrue(alignment.raw_score >= 4978) self.assertTrue(alignment.scaled_score > 71.07) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # start of read self.assertEqual(read_end, 10923) # end of read self.assertTrue( abs(alignment.ref_start_pos - 2001) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 12186) < self.pos_margin_of_error)
def test_tough_alignment_8(self): """ This has a weird change in slope - the read has a good part and a bad part. Ideally the alignment should get the whole thing! """ self.do_alignment('8', 0) read = self.aligned_reads['8'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '8') self.assertTrue(alignment.raw_score >= 2812) self.assertTrue(alignment.scaled_score > 76.36) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 681) < self.pos_margin_of_error) self.assertEqual(read_end, 3808) # end of read self.assertTrue( abs(alignment.ref_start_pos - 19594) < self.pos_margin_of_error) self.assertEqual(alignment.ref_end_pos, 21983) # end of ref
def test_tough_alignment_1(self): """ This read goes through a repetitive area, which means that the densest area of common k-mers is not on the correct alignment line. This means more than one line tracing is required to get it right. """ self.do_alignment('1', 0) read = self.aligned_reads['1'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '1') self.assertTrue(alignment.raw_score >= 20740) self.assertTrue(alignment.scaled_score > 91.02) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 10785) < self.pos_margin_of_error) self.assertTrue(abs(read_end - 19629) < self.pos_margin_of_error) self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertEqual(alignment.ref_end_pos, 9241) # end of ref
def test_tough_alignment_7(self): """ This one misses the right alignment on lower sensitivities because the line tracing never gets 'lost' but is still the wrong one. Higher sensitivities find the correct alignment by trying other starting points. """ self.do_alignment('7', 0) read = self.aligned_reads['7'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '7') self.assertTrue(alignment.raw_score >= 125555) self.assertTrue(alignment.scaled_score > 88.92) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # start of read self.assertTrue(abs(read_end - 57721) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_start_pos - 35024) < self.pos_margin_of_error) self.assertEqual(alignment.ref_end_pos, 95758) # end of ref
def test_tough_alignment_2(self): """ This read goes through a repetitive area, which means that the densest area of common k-mers is not on the correct alignment line. This means more than one line tracing is required to get it right. """ self.do_alignment('2', 0) read = self.aligned_reads['2'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '2') self.assertTrue(alignment.raw_score >= 34439) self.assertTrue(alignment.scaled_score > 90.35) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 22493) < self.pos_margin_of_error) self.assertEqual(read_end, 37581) # end of read self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 15673) < self.pos_margin_of_error)
def test_tough_alignment_0(self): """ The beginning of the reference in this case is repetitive, which was able to throw off Seqan's global chaining algorithm, resulting in an awkward alignment. I think I fixed this by limiting Seqan to the seeds which are near the diagonals of line tracing points. """ self.do_alignment('0', 0) read = self.aligned_reads['0'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '0') self.assertTrue(alignment.raw_score >= 126074) self.assertTrue(alignment.scaled_score > 91.07) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 18662) < self.pos_margin_of_error) self.assertEqual(read_end, 72402) # end of read self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 55814) < self.pos_margin_of_error)
def test_tough_alignment_13(self): """ This read caused a crash before I added the getMaxSeedChainGapArea function in semi_global_align.cpp. It left a very large gap in the global seed chain which caused some problem with Seqan. """ self.do_alignment('13', 1) read = self.aligned_reads['13'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '13') self.assertTrue(alignment.raw_score >= 101608) self.assertTrue(alignment.scaled_score > 88.49) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # start of read self.assertEqual(read_end, 46710) # end of read self.assertTrue( abs(alignment.ref_start_pos - 109308) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 159675) < self.pos_margin_of_error)
def test_tough_alignment_4(self): """ Like so many of these tough ones, this case has a read which enters a repetitive region after overlapping with the end of the contig. Before I fixed some aspects of the alignment, the line tracing was getting caught on some of these spurious repeats instead of sticking to the main line. """ self.do_alignment('4', 0) read = self.aligned_reads['4'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '4') self.assertTrue(alignment.raw_score >= 58531) self.assertTrue(alignment.scaled_score > 86.47) read_start, read_end = alignment.read_start_end_positive_strand() self.assertTrue(abs(read_start - 9582) < self.pos_margin_of_error) self.assertEqual(read_end, 39544) # end of read self.assertEqual(alignment.ref_start_pos, 0) # start of ref self.assertTrue( abs(alignment.ref_end_pos - 31277) < self.pos_margin_of_error)
def test_tough_alignment_3(self): """ The first line tracing of this alignment is bad but uses all the points. This previously caused a crash when the program tried to do a second line trace but there were no points left to find a starting position. """ self.do_alignment('3', 0) read = self.aligned_reads['3'] self.assertEqual(len(read.alignments), 1) alignment = read.alignments[0] self.assertEqual(alignment.read.name, '3') self.assertTrue(alignment.raw_score >= 786) self.assertTrue(alignment.scaled_score > 75.19) read_start, read_end = alignment.read_start_end_positive_strand() self.assertEqual(read_start, 0) # start of read self.assertEqual(read_end, 872) # end of read self.assertTrue( abs(alignment.ref_start_pos - 41783) < self.pos_margin_of_error) self.assertTrue( abs(alignment.ref_end_pos - 42680) < self.pos_margin_of_error)