class TestIndexedBam(_IndexedAlnFileReaderTests): READER_CONSTRUCTOR = IndexedBamReader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[0], data.getLambdaFasta()) def test_empty_bam(self): fn = data.getEmptyBam() bam = IndexedBamReader(fn) EQ(len(bam), 0) def test_alignment_identity(self): """ Check that the values of the 'identity' property are consistent between IndexedBamReader (numpy array) and BamAlignment (float) """ fn = data.getBamAndCmpH5()[0] with IndexedBamReader(fn) as bam_in: i1 = bam_in.identity i2 = np.array([rec.identity for rec in bam_in]) EQ((i2 == i1).all(), True) def test_alignment_identity_unindexed(self): """ Check that the value of the 'identity' property is the same whether or not the .pbi index was used to calculate it. """ fn1 = data.getBamAndCmpH5()[0] fn2 = tempfile.NamedTemporaryFile(suffix=".bam").name shutil.copyfile(fn1, fn2) with IndexedBamReader(fn1) as bam_pbi: with BamReader(fn2) as bam_noindex: i1 = np.array([rec.identity for rec in bam_pbi]) i2 = np.array([rec.identity for rec in bam_noindex]) EQ((i2 == i1).all(), True)
def test_len_h5(self): # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112)
class TestIndexedBam(_IndexedAlnFileReaderTests): READER_CONSTRUCTOR = IndexedBamReader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[0], data.getLambdaFasta()) def test_empty_bam(self): fn = data.getEmptyBam() bam = IndexedBamReader(fn) EQ(len(bam), 0)
class TestBasicBam(_BasicAlnFileReaderTests): READER_CONSTRUCTOR = BamReader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[0], data.getLambdaFasta()) def testSpecVersion(self): EQ("3.0.1", self.f.version) def testReadScore(self): EQISH(0.904, self.fwdAln.readScore, 3)
def test_alignment_identity(self): """ Check that the values of the 'identity' property are consistent between IndexedBamReader (numpy array) and BamAlignment (float) """ fn = data.getBamAndCmpH5()[0] with IndexedBamReader(fn) as bam_in: i1 = bam_in.identity i2 = np.array([rec.identity for rec in bam_in]) EQ((i2 == i1).all(), True)
def test_alignment_identity(self): """ Check that the values of the 'identity' property are consistent between IndexedBamReader (numpy array) and BamAlignment (float) """ fn = data.getBamAndCmpH5()[0] with IndexedBamReader(fn) as bam_in: i1 = bam_in.identity i2 = np.array([ rec.identity for rec in bam_in ]) EQ((i2 == i1).all(), True)
def test_alignment_identity_unindexed(self): """ Check that the value of the 'identity' property is the same whether or not the .pbi index was used to calculate it. """ fn1 = data.getBamAndCmpH5()[0] fn2 = tempfile.NamedTemporaryFile(suffix=".bam").name shutil.copyfile(fn1, fn2) with IndexedBamReader(fn1) as bam_pbi: with BamReader(fn2) as bam_noindex: i1 = np.array([ rec.identity for rec in bam_pbi ]) i2 = np.array([ rec.identity for rec in bam_noindex ]) EQ((i2 == i1).all(), True)
def test_alignment_identity_unindexed(self): """ Check that the value of the 'identity' property is the same whether or not the .pbi index was used to calculate it. """ fn1 = data.getBamAndCmpH5()[0] fn2 = tempfile.NamedTemporaryFile(suffix=".bam").name shutil.copyfile(fn1, fn2) with IndexedBamReader(fn1) as bam_pbi: with BamReader(fn2) as bam_noindex: i1 = np.array([rec.identity for rec in bam_pbi]) i2 = np.array([rec.identity for rec in bam_noindex]) EQ((i2 == i1).all(), True)
class TestCmpH5(_IndexedAlnFileReaderTests): READER_CONSTRUCTOR = CmpH5Reader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[1],) # # Test behaviors specific to CmpH5Reader, which should be few. # def testLazyChemistryResolution(self): """ The CmpH5Reader allows reading of files that have missing chemistry information---an exception will be thrown only upon attempts to access the information. We need to retain this behavior for compatibility. """ oldCmpH5 = data.getCmpH5() C = CmpH5Reader(oldCmpH5) # no exception here with assert_raises(ChemistryLookupError): C.sequencingChemistry with assert_raises(ChemistryLookupError): C[0].sequencingChemistry
def __init__(self): bamFname, cmpFname = D.getBamAndCmpH5() lambdaFasta = D.getLambdaFasta() self.b = PacBioBamReader(bamFname, lambdaFasta) self.c = CmpH5Reader(cmpFname) self.bBasic = BamReader(bamFname) # Note that sorting orders are not generally the same... BAM # sorts + alns before - alns, when there is a tie on tStart; # we don't do this in cmp.h5 (we next sort on tEnd). However # in this file there are no ties on tStart. self.bAlns = list(self.b) self.bFwd = self.bAlns[0] self.bRev = self.bAlns[1] self.cAlns = list(self.c) self.cFwd = self.cAlns[0] self.cRev = self.cAlns[1] self.cFwdClipped = self.cFwd.clippedTo(10, 60) self.bFwdClipped = self.bFwd.clippedTo(10, 60) self.cRevClipped = self.cRev.clippedTo(310, 360) self.bRevClipped = self.bRev.clippedTo(310, 360)
def testIncorrectReference(self): bamFname, _ = D.getBamAndCmpH5() incorrectFasta = D.getTinyFasta() with assert_raises(Exception): f = BamReader(bamFname, incorrectFasta)
def test_alignmentset_index(self): aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) reads = aln.readsInRange(aln.refNames[0], 0, 1000) self.assertEqual(len(list(reads)), 2) self.assertEqual(len(list(aln)), 112) self.assertEqual(len(aln.index), 112)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) self.assertEqual(sum(1 for _ in aln), 92) self.assertEqual(sum(len(rec) for rec in aln), 123588) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) # SubreadSet sset = SubreadSet(data.getXml(10), strict=True) self.assertEqual(len(sset), 92) self.assertEqual(sset._length, (92, 124093)) self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) self.assertEqual(sum(1 for _ in sset), 92) self.assertEqual(sum(len(rec) for rec in sset), 124093) # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)
def test_cmp_alignmentset_filters(self): aln = AlignmentSet(upstreamdata.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) aln.filters.addRequirement(length=[('>=', 1000)]) self.assertEqual(len(aln), 12)
def test_movie_filter(self): # unaligned bam bam0 = ("/pbi/dept/secondary/siv/testdata/" "SA3-DS/ecoli/2590956/0003/" "Analysis_Results/m140913_222218_42240_c10069" "9952400000001823139203261564_s1_p0.all.subreadset.xml") bam1 = ("/pbi/dept/secondary/siv/testdata/" "SA3-DS/ecoli/2590953/0001/" "Analysis_Results/m140913_005018_42139_c10071" "3652400000001823152404301534_s1_p0.all.subreadset.xml") aln = SubreadSet(bam0, bam1) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(aln.readGroupTable['ID'])) self.assertEqual(len(set(aln.readGroupTable['ID'])), 2) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(set(aln.index.qId))) self.assertEqual(len(aln), 178570) aln.filters.addRequirement(movie=[( '=', 'm140913_005018_42139_c100713652400000001823152404301534_s1_p0')]) self.assertEqual(len(SubreadSet(bam1)), len(aln)) # aligned bam #bam0 = ("/pbi/dept/secondary/siv/testdata/" # "SA3-DS/ecoli/2590956/0003/Alignment_Results/" # "m140913_222218_42240_c1006999524000000018231" # "39203261564_s1_p0.all.alignmentset.xml") bam0 = upstreamdata.getBamAndCmpH5()[0] bam1 = ("/pbi/dept/secondary/siv/testdata/" "SA3-DS/ecoli/2590953/0001/Alignment_Results/" "m140913_005018_42139_c1007136524000000018231" "52404301534_s1_p0.all.alignmentset.xml") aln = AlignmentSet(bam0, bam1) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(aln.readGroupTable['ID'])) self.assertEqual(len(set(aln.readGroupTable['ID'])), 2) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(set(aln.index.qId))) self.assertEqual(len(aln), 103144) aln.filters.addRequirement(movie=[( '=', 'm140913_005018_42139_c100713652400000001823152404301534_s1_p0')]) self.assertEqual(len(AlignmentSet(bam1)), len(aln)) # cmpH5 cmp1 = upstreamdata.getBamAndCmpH5()[1] cmp2 = ("/pbi/dept/secondary/siv/testdata/" "genomic_consensus-unittest/bam_c4p6_tests/" "ecoli_c4p6.cmp.h5") aln = AlignmentSet(cmp1, cmp2) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(aln.readGroupTable['ID'])) self.assertEqual(len(set(aln.readGroupTable['ID'])), len(set(aln.index.MovieID))) self.assertEqual(len(set(aln.readGroupTable['ID'])), 2) self.assertEqual(len(aln), 57147) aln.filters.addRequirement(movie=[( '=', 'm140905_042212_sidney_c100564852550000001823085912221377_s1_X0')]) len1 = len(AlignmentSet(cmp1)) self.assertEqual(len1, len(aln)) aln.filters.removeRequirement('movie') self.assertEqual(len(aln), 57147)
class TestIndexedBam(_IndexedAlnFileReaderTests): READER_CONSTRUCTOR = IndexedBamReader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[0], data.getLambdaFasta())
class TestBasicBam(_BasicAlnFileReaderTests): READER_CONSTRUCTOR = BamReader CONSTRUCTOR_ARGS = (data.getBamAndCmpH5()[0], data.getLambdaFasta()) def testSpecVersion(self): EQ("3.0b7", self.f.version)
def test_cmp_alignmentset_filters(self): aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) aln.filters.addRequirement(length=[('>=', 1000)]) self.assertEqual(len(aln), 12)
def test_alignment_identity(self): fn = data.getBamAndCmpH5()[0] with IndexedBamReader(fn) as bam_in: i1 = bam_in.identity i2 = np.array([ rec.identity for rec in bam_in ]) EQ((i2 == i1).all(), True)