class _BasicAlnFileReaderTests(object): """ Abstract base class for tests of the basic reader functionality---functionality not requiring the bam.pbi index. The tests are pretty tailored to the BAM/cmp.h5 files in pbcore.data. """ READER_CONSTRUCTOR = None CONSTRUCTOR_ARGS = None BAX_FILE = data.getBaxForBam() def __init__(self): self.f = self.READER_CONSTRUCTOR(*self.CONSTRUCTOR_ARGS) self.alns = list(self.f) self.fwdAln = self.alns[1] self.revAln = self.alns[105] def testBasicOperations(self): EQ(False, self.f.isEmpty) EQ(True, self.f.isSorted) EQ(112, len(self.f)) def testStrandOrientation(self): EQ(True, self.fwdAln.isForwardStrand) EQ(False, self.fwdAln.isReverseStrand) EQ(False, self.revAln.isForwardStrand) EQ(True, self.revAln.isReverseStrand) def testReadName(self): EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/1_344", self.fwdAln.readName) EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/51534/1_200", self.revAln.readName) def testAlignedRead(self): expectedFwdNative = "GCCGCGAT-GATGAAAACTGATACCGGGGTTGCTGAGTGAATATATCGAACAGTCAGGTT-ACAGGCTGCGGCATTTTGTCCGCGCC-GGCTTCGCTCACTGTTCAGGCCGGAG-CACAGACCGCCGTTGAAATGGGCGGATGCTAATTACTATCTCCCGAAAGAAT-CGC-TACCAGGAAGGGCGATGGGAAACACTGCCCTTTCAGCGGG-CATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGT-AATGTGGTGAAGTCTG-CCGTGTCGGTTATTCCAAAATGCTGCTGGGTG-TTATGCCT-CGTTTATAGAGCATAAGCAGCGCAACA-CCTTATCTGGTTGCC" EQ(expectedFwdNative, self.fwdAln.read(aligned=True)) EQ(expectedFwdNative, self.fwdAln.read()) EQ(expectedFwdNative, self.fwdAln.read(orientation="genomic")) expectedRevNative = "TAGCCACCGGATATCACCACAGGTGAGGCCGTGTAAGTTG-AGGTTTTTCTACGTCAGATTCTTTTGGGATTGGGCTTGGGTTTATTTCCTGGTGCGTTTCGTT-GAAGGTATTTGCAGTTTTCGCAGATTATGCCTCCGGTGATACTTCGTCGCTGTCTCGCCACACGTCCTCCTTTTCCTGCGGTAGTGGTAACACCCC" EQ(expectedRevNative, self.revAln.read(aligned=True)) EQ(expectedRevNative, self.revAln.read()) EQ(RC(expectedRevNative), self.revAln.read(orientation="genomic")) def testUnalignedRead(self): expectedFwdNative = 'GCCGCGATGATGAAAACTGATACCGGGGTTGCTGAGTGAATATATCGAACAGTCAGGTTACAGGCTGCGGCATTTTGTCCGCGCCGGCTTCGCTCACTGTTCAGGCCGGAGCACAGACCGCCGTTGAAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCGCTACCAGGAAGGGCGATGGGAAACACTGCCCTTTCAGCGGGCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTAATGTGGTGAAGTCTGCCGTGTCGGTTATTCCAAAATGCTGCTGGGTGTTATGCCTCGTTTATAGAGCATAAGCAGCGCAACACCTTATCTGGTTGCC' EQ(expectedFwdNative, self.fwdAln.read(aligned=False)) EQ(expectedFwdNative, self.fwdAln.read(aligned=False, orientation="genomic")) expectedRevNative = "TAGCCACCGGATATCACCACAGGTGAGGCCGTGTAAGTTGAGGTTTTTCTACGTCAGATTCTTTTGGGATTGGGCTTGGGTTTATTTCCTGGTGCGTTTCGTTGAAGGTATTTGCAGTTTTCGCAGATTATGCCTCCGGTGATACTTCGTCGCTGTCTCGCCACACGTCCTCCTTTTCCTGCGGTAGTGGTAACACCCC" EQ(expectedRevNative, self.revAln.read(aligned=False)) EQ(RC(expectedRevNative), self.revAln.read(aligned=False, orientation="genomic")) def testAlignedReference(self): expectedFwdNative = 'GCCGCGCTGGATG--AACTGATACCGGGGTTGCTGAGTGAATATATCGAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCCGTTG-AATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCAGCGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCAAAATGCTGCTGGGTGTTTATGCCTAC-TTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCC' EQ(expectedFwdNative, self.fwdAln.reference(aligned=True)) EQ(expectedFwdNative, self.fwdAln.reference()) EQ(expectedFwdNative, self.fwdAln.reference(orientation="genomic")) expectedRevNative = 'TAGCCACCGGATATC-CCACAGGTGA-GCCGTGT-AGTTGAAGG-TTTT-TACGTCAGATTCTTTTGGGATT-GGCTTGGGTTTATTT-CTGGTGCGTTTCGTTGGAAGGTATTTGCAGTTTTCGCAGATTATG--T-CGGTGATACTTCGTCGCTGTCTCGCCACACGTCCTCCTTTTCCTGCGGTAGTGGTAACACCCC' EQ(expectedRevNative, self.revAln.reference(aligned=True)) EQ(expectedRevNative, self.revAln.reference()) EQ(RC(expectedRevNative), self.revAln.reference(orientation="genomic")) def testUnalignedReference(self): expectedFwdNative = "GCCGCGCTGGATGAACTGATACCGGGGTTGCTGAGTGAATATATCGAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCCGTTGAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCAGCGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCAAAATGCTGCTGGGTGTTTATGCCTACTTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCC" EQ(expectedFwdNative, self.fwdAln.reference(aligned=False)) EQ(expectedFwdNative, self.fwdAln.reference(aligned=False, orientation="genomic")) expectedRevNative = "TAGCCACCGGATATCCCACAGGTGAGCCGTGTAGTTGAAGGTTTTTACGTCAGATTCTTTTGGGATTGGCTTGGGTTTATTTCTGGTGCGTTTCGTTGGAAGGTATTTGCAGTTTTCGCAGATTATGTCGGTGATACTTCGTCGCTGTCTCGCCACACGTCCTCCTTTTCCTGCGGTAGTGGTAACACCCC" EQ(expectedRevNative, self.revAln.reference(aligned=False)) EQ(RC(expectedRevNative), self.revAln.reference(aligned=False, orientation="genomic")) def testDeletionQV(self): expectedFwdNative = ( [ 17, 17, 17, 17, 17, 17, 17, 17,255, 4, 17, 17, 17, 8, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 10, 8, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 6, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 11, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 11, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 7, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 255, 8, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17] ) AEQ(expectedFwdNative, self.fwdAln.DeletionQV(aligned=True)) AEQ(expectedFwdNative, self.fwdAln.DeletionQV()) AEQ(expectedFwdNative, self.fwdAln.DeletionQV(orientation="genomic")) expectedRevNative = ( [ 17, 17, 17, 17, 6, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 6, 17, 17, 17, 17, 17, 17, 6, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 255, 10, 17, 17, 17, 17, 17, 8, 9, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 10, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 8, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 7, 17, 255, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9, 17, 17, 5, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 7, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 ] ) print self.revAln.DeletionQV(aligned=True) AEQ(expectedRevNative, self.revAln.DeletionQV(aligned=True)) AEQ(expectedRevNative, self.revAln.DeletionQV()) AEQ(expectedRevNative[::-1], self.revAln.DeletionQV(orientation="genomic")) # # def testInsertionQV(self): # # pass # # def testSubstitutionQV(self): # # pass # # def testIPD(self): # # pass # def testDeletionTag(self): # expectedFwdNative = [78, 78, 84, 78, 78, 67, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, # 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 65, 78, # 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78] # AEQ(expectedFwdNative, self.fwdAln.DeletionTag(aligned=True)) # AEQ(expectedFwdNative, self.fwdAln.DeletionTag()) # AEQ(expectedFwdNative, self.fwdAln.DeletionTag(orientation="genomic")) # expectedRevNative = [78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, # 78, 78, 78, 78, 45, 67, 78, 78, 78, 78, 78, 45, 84, 78, 78, 78, 78, # 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 45, 78, 78, 78, 78, # 78, 78, 78, 78, 78, 78, 78, 78, 78, 78] # AEQ(expectedRevNative, self.revAln.DeletionTag(aligned=True)) # AEQ(expectedRevNative, self.revAln.DeletionTag()) # # TODO: what is the correct behavior here? # #AEQ(expectedRevNative[::-1], self.revAln.DeletionTag(orientation="genomic")) def testTranscript(self): EQ('MMMMMMRMDMMMMIIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMDMMMMMMMMMMMMMMRMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMDMIMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMM', self.fwdAln.transcript()) EQ("MMMMMMMMMMMMMMMIMMMMMMMMMMIMMMMMMMIMMMMMDMMMIMMMMIMMMMMMMMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMIIMIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM", self.revAln.transcript()) def testClippedAlignments(self): # Get a more interesting (more gappy) fwd strand aln a = self.fwdAln EQ([(980, 'C', 'C'), (981, 'C', 'C'), (982, 'T', 'T'), (983, 'A', '-'), (984, 'C', 'C'), (985, '-', 'G'), (985, 'T', 'T'), (986, 'T', 'T') ], zip(a.referencePositions(), a.reference(), a.read())[308:316]) ac1 = a.clippedTo(983, 985) EQ(983, ac1.referenceStart) EQ(985, ac1.referenceEnd) EQ([(983, 'A', '-'), (984, 'C', 'C')], zip(ac1.referencePositions(), ac1.reference(), ac1.read())) ac2 = a.clippedTo(982, 986) EQ(982, ac2.referenceStart) EQ(986, ac2.referenceEnd) EQ([(982, 'T', 'T'), (983, 'A', '-'), (984, 'C', 'C'), (985, '-', 'G'), (985, 'T', 'T')], zip(ac2.referencePositions(), ac2.reference(), ac2.read())) ac3 = a.clippedTo(984, 985) EQ(984, ac3.referenceStart) EQ(985, ac3.referenceEnd) EQ([(984, 'C', 'C')], zip(ac3.referencePositions(), ac3.reference(), ac3.read())) # # Get a more interesting (more gappy) rev strand aln # b = self.alns[3] # EQ([(2216, 'G', 'G'), # (2215, 'G', 'G'), # (2214, '-', 'C'), # (2214, 'C', 'C'), # (2213, 'A', 'A'), # (2212, 'T', 'T'), # (2211, 'G', 'G'), # (2210, 'C', 'C'), # (2209, 'T', 'T'), # (2208, 'G', '-'), # (2207, 'G', 'G'), # (2206, 'C', 'C')], # zip(b.referencePositions(), b.reference(), b.read())[188:200]) # bc1 = b.clippedTo(2208, 2214) # EQ([(2213, 'A', 'A'), # (2212, 'T', 'T'), # (2211, 'G', 'G'), # (2210, 'C', 'C'), # (2209, 'T', 'T'), # (2208, 'G', '-')], # zip(bc1.referencePositions(), bc1.reference(), bc1.read())) # bc2 = b.clippedTo(2207, 2215) # EQ([(2214, 'C', 'C'), # (2213, 'A', 'A'), # (2212, 'T', 'T'), # (2211, 'G', 'G'), # (2210, 'C', 'C'), # (2209, 'T', 'T'), # (2208, 'G', '-'), # (2207, 'G', 'G')], # zip(bc2.referencePositions(), bc2.reference(), bc2.read())) # bc3 = b.clippedTo(2209, 2214) # EQ([(2213, 'A', 'A'), # (2212, 'T', 'T'), # (2211, 'G', 'G'), # (2210, 'C', 'C'), # (2209, 'T', 'T')], # zip(bc3.referencePositions(), bc3.reference(), bc3.read())) # # Test clipping in a large deletion # d = self.alns[52] # EQ([(16191, 'C', 'C'), # (16192, 'A', 'A'), # (16193, 'G', 'G'), # (16194, 'C', 'C'), # (16195, 'A', 'A'), # (16196, 'G', '-'), # (16197, 'G', '-'), # (16198, 'T', '-'), # (16199, 'G', 'G'), # (16200, 'A', 'A'), # (16201, 'G', 'G')], # zip(d.referencePositions(), d.reference(), d.read())[129:140]) # dc1 = d.clippedTo(16196, 16198) # # where's the test code? def testBaxAttaching(self): # Before attaching, should get sane exceptions with assert_raises(ValueError): self.fwdAln.zmw with assert_raises(ValueError): self.fwdAln.zmwRead # Now attach self.f.attach(self.BAX_FILE) EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/1_344", self.fwdAln.readName) EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328", self.fwdAln.zmwName) EQ("<Zmw: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328>", repr(self.fwdAln.zmw)) EQ("<ZmwRead: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/1_344>", repr(self.fwdAln.zmwRead)) # Check read contents, for every aln. for aln in self.alns: EQ(aln.read(aligned=False, orientation="native"), aln.zmwRead.basecalls()) def testClippingsVsBaxData(self): self.f.attach(self.BAX_FILE) for aln in [self.fwdAln, self.revAln]: for cS in xrange(aln.tStart, aln.tEnd + 1): for cE in xrange(cS + 1, min(aln.tEnd, cS + 10)): ca = aln.clippedTo(cS, cE) EQ(ca.zmwRead.basecalls(), ca.read(aligned=False, orientation="native")) def testReadsInRange(self): wLen = 1000 for wStart in xrange(0, 50000, wLen): wEnd = wStart + wLen expectedNames = set([ a.readName for a in self.alns if (a.referenceName == "lambda_NEB3011" and a.overlapsReferenceRange(wStart, wEnd)) ]) EQ(expectedNames, set([ a.readName for a in self.f.readsInRange("lambda_NEB3011", wStart, wEnd) ])) def testReadGroupTable(self): rgFwd = self.fwdAln.readGroupInfo EQ([('ID', '<i4'), ('MovieName', 'O'), ('ReadType', 'O'), ('SequencingChemistry', 'O'), ('FrameRate', '<f8')], rgFwd.dtype) EQ("P6-C4", rgFwd.SequencingChemistry) EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0", rgFwd.MovieName) #EQ("bar", rgFwd.ReadType) def testSequencingChemistry(self): EQ(["P6-C4"], self.f.sequencingChemistry) EQ("P6-C4", self.fwdAln.sequencingChemistry) EQ("P6-C4", self.revAln.sequencingChemistry)
def __init__(self): self.bam = BamReader (data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))
def __init__(self): self.bam = BamReader(data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))
def __init__(self): self.V = ZmwReadStitcher(getUnalignedBam()) self.B = BasH5Reader(getBaxForBam()) self.VZ = self.V[1650] self.BZ = self.B[1650]
def setup_class(self): self.bam = BamReader (data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))