def testDebugReportsOnReverseRead(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I2M\t=\t1\t8\tACTGGGAG\tJJJJJJJJ\n" "test1\t147\ttest\t5\t44\t8M\t=\t1\t-8\tGACCCAAC\tJJJJJIJJ\n" "test1\t99\ttest\t1\t44\t3M3I2M\t=\t1\t12\tATTGGGAG\tJJJJJJJJ\n" "test1\t147\ttest\t5\t44\t8M\t=\t1\t-12\tGACCCAAC\tJJJJJHJJ\n") reports = {('test', 10): None} expected_reports = {('test', 10): 'H{A: 2}, I{A: 1}'} remap.sam_to_conseqs(samIO, debug_reports=reports) self.assertDictEqual(expected_reports, reports)
def testDebugReports(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I9M\t=\t1\t12\tACTGGGAGACCCAAC\tJIJJJJJJJJJJJJJ\n" "test1\t147\ttest\t1\t44\t3M3I9M\t=\t1\t-12\tACTGGGAGACCCAAC\tJIJJJJJJJJJJJJJ\n" "test1\t99\ttest\t1\t44\t3M3I9M\t=\t1\t12\tATTGGGAGACCCAAC\tJHJJJJJJJJJJJJJ\n" "test1\t147\ttest\t1\t44\t3M3I9M\t=\t1\t-12\tATTGGGAGACCCAAC\tJHJJJJJJJJJJJJJ\n" ) reports = {('test', 2): None} expected_reports = {('test', 2): 'H{C: 1, T: 1}, I{C: 1}'} remap.sam_to_conseqs(samIO, debug_reports=reports) self.assertDictEqual(expected_reports, reports)
def testDebugReports(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I9M\t=\t1\t12\tACTGGGAGACCCAAC\tJIJJJJJJJJJJJJJ\n" "test1\t147\ttest\t1\t44\t3M3I9M\t=\t1\t-12\tACTGGGAGACCCAAC\tJIJJJJJJJJJJJJJ\n" "test1\t99\ttest\t1\t44\t3M3I9M\t=\t1\t12\tATTGGGAGACCCAAC\tJHJJJJJJJJJJJJJ\n" "test1\t147\ttest\t1\t44\t3M3I9M\t=\t1\t-12\tATTGGGAGACCCAAC\tJHJJJJJJJJJJJJJ\n" ) reports = {('test', 2): None} expected_reports = {('test', 2): 'H{C: 1, T: 1}, I{C: 1}'} remap.sam_to_conseqs(sam_file, debug_reports=reports) self.assertDictEqual(expected_reports, reports)
def testDebugReportsOnReverseRead(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I2M\t=\t1\t8\tACTGGGAG\tJJJJJJJJ\n" "test1\t147\ttest\t5\t44\t8M\t=\t1\t-8\tGACCCAAC\tJJJJJIJJ\n" "test1\t99\ttest\t1\t44\t3M3I2M\t=\t1\t12\tATTGGGAG\tJJJJJJJJ\n" "test1\t147\ttest\t5\t44\t8M\t=\t1\t-12\tGACCCAAC\tJJJJJHJJ\n" ) reports = {('test', 10): None} expected_reports = {('test', 10): 'H{A: 2}, I{A: 1}'} remap.sam_to_conseqs(sam_file, debug_reports=reports) self.assertDictEqual(expected_reports, reports)
def testReverseLeftOfForward(self): sam_file = StringIO("@SQ\tSN:test\n" "test1\t99\ttest\t2\t44\t1M\t=\t1\t1\tC\tJ\n" "test1\t147\ttest\t1\t44\t1M\t=\t2\t-1\tA\tJ\n") expected_conseqs = {'test': 'AC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testDeletion(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n") expected_conseqs = {'test': 'ACAGGG'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testHeaderFields(self): samIO = StringIO.StringIO( "@SQ\tOF:other field: ignored\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n") expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsBothConverged(self): """ Both references are now closer to the other seed than the start. Don't drop both. Keep test because it has more reads. """ # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual sam_file = StringIO( "@SQ\tSN:test\tSN:other\tSN:unrelated\n" "test1\t99\ttest\t1\t44\t8M\t=\t1\t10\tAAGCCGTA\tJJJJJJJJJJ\n" "test2\t99\ttest\t1\t44\t8M\t=\t1\t10\tAAGCCGTA\tJJJJJJJJJJ\n" "other1\t99\tother\t1\t44\t8M\t=\t1\t10\tATGAAGTA\tJJJJJJJJJJ\n" "unrelated1\t99\tunrelated\t1\t44\t9M\t=\t1\t10\tGGGTTTGGG\tJJJJJJJJJ\n" ) seeds = { 'test': 'ATGAAGTA', 'other': 'AAGCCGAA', 'unrelated': 'GGGTTTGGG' } expected_conseqs = {'test': 'AAGCCGTA', 'unrelated': 'GGGTTTGGG'} expected_distances = { 'test': dict(seed_dist=3, other_dist=1, other_seed='other'), 'other': dict(seed_dist=5, other_dist=0, other_seed='test'), 'unrelated': dict(seed_dist=0, other_dist=7, other_seed='test') } distances = {} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds, original_seeds=seeds, is_filtered=True, distance_report=distances) self.maxDiff = 1000 self.assertEqual(expected_distances, distances) self.assertEqual(expected_conseqs, conseqs)
def testDeletionWithFrameShift(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M1D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n") expected_conseqs = {'test': 'ACA-GGG'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSoftClip(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3S5M1S\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJ\n") expected_conseqs = {'test': 'GGGAG'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSimpleInsertion(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJ\n") expected_conseqs = {'test': 'ACAGGGAGA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsConverged(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual samIO = StringIO.StringIO( "@SQ\tSN:test\tSN:other\tSN:wayoff\n" "test1\t99\ttest\t1\t44\t10M\t=\t1\t10\tATGAGGAGTA\tJJJJJJJJJJJJ\n" "other1\t99\tother\t1\t44\t10M\t=\t1\t10\tATGACCAGTA\tJJJJJJJJJJJJ\n" "wayoff1\t99\twayoff\t1\t44\t10M\t=\t1\t10\tATGAGGGTAC\tJJJJJJJJJJJJ\n" ) seeds = {'test': 'ATGAAGTA', 'other': 'AAGCCGAA', 'wayoff': 'TCATGTAC'} expected_conseqs = {'test': 'ATGAGGAGTA'} expected_distances = { 'test': dict(seed_dist=2, other_dist=5, other_seed='other'), 'other': dict(seed_dist=4, other_dist=2, other_seed='test'), 'wayoff': dict(seed_dist=4, other_dist=3, other_seed='test') } distances = {} conseqs = remap.sam_to_conseqs(samIO, seeds=seeds, is_filtered=True, distance_report=distances) self.maxDiff = 1000 self.assertEqual(expected_conseqs, conseqs) self.assertEqual(expected_distances, distances)
def testLowQualityAtEnd(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACG\tJJ/\n") expected_conseqs = {'test': 'ACN'} conseqs = remap.sam_to_conseqs(samIO, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testUnknownReferenceName(self): samIO = StringIO.StringIO( "@SQ\tSN:testX\n" "test1\t99\ttestY\t1\t44\t12M\t=\t1\t3\tACA\tJJJ\n") expected_conseqs = {} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsConverged(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual sam_file = StringIO( "@SQ\tSN:test\tSN:other\tSN:wayoff\n" "test1\t99\ttest\t1\t44\t10M\t=\t1\t10\tATGAGGAGTA\tJJJJJJJJJJJJ\n" "other1\t99\tother\t1\t44\t10M\t=\t1\t10\tATGACCAGTA\tJJJJJJJJJJJJ\n" "wayoff1\t99\twayoff\t1\t44\t10M\t=\t1\t10\tATGAGGGTAC\tJJJJJJJJJJJJ\n" ) seeds = {'test': 'ATGAAGTA', 'other': 'AAGCCGAA', 'wayoff': 'TCATGTAC'} expected_conseqs = {'test': 'ATGAGGAGTA'} expected_distances = {'test': dict(seed_dist=2, other_dist=5, other_seed='other'), 'other': dict(seed_dist=4, other_dist=2, other_seed='test'), 'wayoff': dict(seed_dist=4, other_dist=3, other_seed='test')} distances = {} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds, original_seeds=seeds, is_filtered=True, distance_report=distances) self.maxDiff = 1000 self.assertEqual(expected_conseqs, conseqs) self.assertEqual(expected_distances, distances)
def testInsertionAfterLowQuality(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJ/JJJJJJ\n") expected_conseqs = {'test': 'ACNAGA'} conseqs = remap.sam_to_conseqs(samIO, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testMaxConsensus(self): sam_file = StringIO("@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n" "test2\t147\ttest\t1\t44\t3M\t=\t1\t-3\tACA\tJJJ\n" "test3\t99\ttest\t1\t44\t3M\t=\t1\t3\tTCA\tJJJ\n") expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQualityAtEnd(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACG\tJJ/\n" ) expected_conseqs = {'test': 'ACN'} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testOffset(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t147\ttest\t4\t44\t12M\t=\t3\t-12\tACAAGACCCAAC\tJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'NNNACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testBigDeletionWithFrameShift(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M4D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" ) expected_conseqs = {'test': 'ACA----GGG'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQualityForward(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t3\t3\tATA\tJJA\n" "test1\t147\ttest\t3\t44\t3M\t=\t1\t-3\tGCC\tJJJ\n") expected_conseqs = {'test': 'ATGCC'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testInsertionAfterLowQuality(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJ/JJJJJJ\n" ) expected_conseqs = {'test': 'ACNAGA'} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testSimpleInsertion(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACAGGGAGA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testSoftClip(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3S5M1S\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJ\n" ) expected_conseqs = {'test': 'GGGAG'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testExtraFields(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\tAS:i:236\tNM:i:12\n" ) expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testOffset(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t147\ttest\t4\t44\t12M\t=\t3\t-12\tACAAGACCCAAC\tJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'NNNACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testExtraFields(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\tAS:i:236\tNM:i:12\n" ) expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testTie(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tGCA\tJJJ\n" "test2\t147\ttest\t1\t44\t3M\t=\t1\t-3\tTCA\tJJJ\n") expected_conseqs = {'test': 'GCA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testUnknownReferenceName(self): sam_file = StringIO( "@SQ\tSN:testX\n" "test1\t99\ttestY\t1\t44\t12M\t=\t1\t3\tACA\tJJJ\n" ) expected_conseqs = {} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testHeaderFields(self): sam_file = StringIO( "@SQ\tOF:other field: ignored\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n" ) expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testDeletion(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" ) expected_conseqs = {'test': 'ACAGGG'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQuality(self): # Note that we ignore the overlapped portion of the reverse read, # even if it has higher quality. sam_file = StringIO("@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACG\tJ/J\n") expected_conseqs = {'test': 'ANG'} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testInsertionAndOffset(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJJJJ\n" "test2\t99\ttest\t5\t44\t5M\t=\t1\t5\tGACCC\tJJJJJ\n") expected_conseqs = {'test': 'ACAGGGAGACCC'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQualityForward(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t3\t3\tATA\tJJA\n" "test1\t147\ttest\t3\t44\t3M\t=\t1\t-3\tGCC\tJJJ\n" ) expected_conseqs = {'test': 'ATGCC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testTie(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tGCA\tJJJ\n" "test2\t147\ttest\t1\t44\t3M\t=\t1\t-3\tTCA\tJJJ\n" ) expected_conseqs = {'test': 'GCA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsNeedSomeReads(self): sam_file = StringIO("@SQ\tSN:test\n" "test1\t99\ttest\t4\t44\t3M\t=\t10\t3\tTAT\tJJJ\n") seeds = {'test': 'ACATTTGGGCAC', 'other': 'TATGCACCC'} expected_conseqs = {'test': 'ACATATGGGCAC'} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds) self.assertDictEqual(expected_conseqs, conseqs)
def testSimple(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t12M\t=\t1\t12\tACAAGACCCAAC\tJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testInsertionAndOffset(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3I3M\t=\t1\t9\tACAGGGAGA\tJJJJJJJJJJJJ\n" "test2\t99\ttest\t5\t44\t5M\t=\t1\t5\tGACCC\tJJJJJ\n" ) expected_conseqs = {'test': 'ACAGGGAGACCC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testComplexInsertion(self): # Insertions are ignored if not a multiple of three sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M1I3M2I6M\t=\t1\t12\tACAGAGAGGCCCAAC\tJJJJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testReverseLeftOfForward(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t2\t44\t1M\t=\t1\t1\tC\tJ\n" "test1\t147\ttest\t1\t44\t1M\t=\t2\t-1\tA\tJ\n" ) expected_conseqs = {'test': 'AC'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testAllLowQuality(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual sam_file = StringIO("@SQ\tSN:test\n" "test1\t147\ttest\t1\t24\t1M\t=\t1\t-1\tT\t#\n") expected_conseqs = {} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testPairMapsToTwoReferences(self): samIO = StringIO.StringIO( "@SQ\tSN:testX\n" "@SQ\tSN:testY\n" "test1\t99\ttestX\t1\t44\t3M\t=\t1\t3\tACG\tJJJ\n" "test1\t147\ttestY\t1\t44\t3M\t=\t1\t-3\tACG\tJJJ\n") expected_conseqs = {} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testHeaders(self): samIO = StringIO.StringIO( "@SH\tsome header\n" "@MHI\tmost headers are ignored, except SQ for sequence reference\n" "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n") expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testDeletionInSomeReads(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test2\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test3\t99\ttest\t1\t44\t9M\t=\t3\t9\tACATTTGGG\tJJJJJJJJJ\n") expected_conseqs = {'test': 'ACATTTGGG'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testComplexInsertion(self): # Insertions are ignored if not a multiple of three samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M1I3M2I6M\t=\t1\t12\tACAGAGAGGCCCAAC\tJJJJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSimple(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t12M\t=\t1\t12\tACAAGACCCAAC\tJJJJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACAAGACCCAAC'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsWithLowQuality(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t4\t44\t3M\t=\t10\t3\tTAT\tJJ/\n") seeds = {'test': 'ACATTTGGGCAC'} expected_conseqs = {'test': 'ACATATGGGCAC'} conseqs = remap.sam_to_conseqs(samIO, seeds=seeds, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testMaxConsensus(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n" "test2\t147\ttest\t1\t44\t3M\t=\t1\t-3\tACA\tJJJ\n" "test3\t99\ttest\t1\t44\t3M\t=\t1\t3\tTCA\tJJJ\n" ) expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(samIO) self.assertDictEqual(expected_conseqs, conseqs)
def testHeaders(self): sam_file = StringIO( "@SH\tsome header\n" "@MHI\tmost headers are ignored, except SQ for sequence reference\n" "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACA\tJJJ\n" ) expected_conseqs = {'test': 'ACA'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQuality(self): # Note that we ignore the overlapped portion of the reverse read, # even if it has higher quality. sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M\t=\t1\t3\tACG\tJ/J\n" ) expected_conseqs = {'test': 'ANG'} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testDeletionInSomeReads(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test2\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test3\t99\ttest\t1\t44\t9M\t=\t3\t9\tACATTTGGG\tJJJJJJJJJ\n" ) expected_conseqs = {'test': 'ACATTTGGG'} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testPairMapsToTwoReferences(self): sam_file = StringIO( "@SQ\tSN:testX\n" "@SQ\tSN:testY\n" "test1\t99\ttestX\t1\t44\t3M\t=\t1\t3\tACG\tJJJ\n" "test1\t147\ttestY\t1\t44\t3M\t=\t1\t-3\tACG\tJJJ\n" ) expected_conseqs = {} conseqs = remap.sam_to_conseqs(sam_file) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQualityAndDeletion(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test2\t99\ttest\t1\t44\t9M\t=\t3\t9\tACATTTGGG\tJJJ///JJJ\n") expected_conseqs = {'test': 'ACANNNGGG'} conseqs = remap.sam_to_conseqs(samIO, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testNothingMapped(self): samIO = StringIO.StringIO("@SQ\tSN:test\tSN:other\n") seeds = {'test': 'ATGAAGTACTCTCT', 'other': 'AAGCCGAAGTGTGT'} expected_conseqs = {} conseqs = remap.sam_to_conseqs(samIO, seeds=seeds, is_filtered=True, filter_coverage=2) self.assertDictEqual(expected_conseqs, conseqs)
def testAllLowQuality(self): # SAM:qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual sam_file = StringIO( "@SQ\tSN:test\n" "test1\t147\ttest\t1\t24\t1M\t=\t1\t-1\tT\t#\n" ) expected_conseqs = {} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testLowQualityAndDeletion(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t3\t6\tACAGGG\tJJJJJJ\n" "test2\t99\ttest\t1\t44\t9M\t=\t3\t9\tACATTTGGG\tJJJ///JJJ\n" ) expected_conseqs = {'test': 'ACANNNGGG'} conseqs = remap.sam_to_conseqs(sam_file, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsNeedSomeReads(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t4\t44\t3M\t=\t10\t3\tTAT\tJJJ\n" ) seeds = {'test': 'ACATTTGGGCAC', 'other': 'TATGCACCC'} expected_conseqs = {'test': 'ACATATGGGCAC'} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds) self.assertDictEqual(expected_conseqs, conseqs)
def testSeeds(self): samIO = StringIO.StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t4\t44\t3M\t=\t10\t3\tTAT\tJJJ\n" "test2\t99\ttest\t10\t44\t3M\t=\t4\t-3\tCAC\tJJJ\n" ) seeds = {'test': 'ACATTTGGGCAC'} expected_conseqs = {'test': 'ACATATGGGCAC'} conseqs = remap.sam_to_conseqs(samIO, seeds=seeds) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsWithLowQuality(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t4\t44\t3M\t=\t10\t3\tTAT\tJJ/\n" ) seeds = {'test': 'ACATTTGGGCAC'} expected_conseqs = {'test': 'ACATATGGGCAC'} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)
def testSeedsWithCodonDeletion(self): sam_file = StringIO( "@SQ\tSN:test\n" "test1\t99\ttest\t1\t44\t3M3D3M\t=\t10\t6\tACAGGG\tJJJJJJ\n" ) seeds = {'test': 'ACATTTGGGCAC'} expected_conseqs = {'test': 'ACATTTGGGCAC'} conseqs = remap.sam_to_conseqs(sam_file, seeds=seeds, quality_cutoff=32) self.assertDictEqual(expected_conseqs, conseqs)