def test_len_h5(self): # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(7), strict=True) assert len(aln) == 92 assert aln._length == (92, 123588) assert aln.totalLength == 123588 assert aln.numRecords == 92 aln.totalLength = -1 aln.numRecords = -1 assert aln.totalLength == -1 assert aln.numRecords == -1 aln.updateCounts() assert aln.totalLength == 123588 assert aln.numRecords == 92 assert sum(1 for _ in aln) == 92 assert sum(len(rec) for rec in aln) == 123588 # AlignmentSet with filters aln = AlignmentSet(data.getXml(14), strict=True) assert len(aln) == 40 assert aln._length == (40, 52023) assert aln.totalLength == 52023 assert aln.numRecords == 40 aln.totalLength = -1 aln.numRecords = -1 assert aln.totalLength == -1 assert aln.numRecords == -1 aln.updateCounts() assert aln.totalLength == 52023 assert aln.numRecords == 40 # SubreadSet sset = SubreadSet(data.getXml(9), strict=True) assert len(sset) == 92 assert sset._length == (92, 124093) assert sset.totalLength == 124093 assert sset.numRecords == 92 sset.totalLength = -1 sset.numRecords = -1 assert sset.totalLength == -1 assert sset.numRecords == -1 sset.updateCounts() assert sset.totalLength == 124093 assert sset.numRecords == 92 assert sum(1 for _ in sset) == 92 assert sum(len(rec) for rec in sset) == 124093 # ReferenceSet sset = ReferenceSet(data.getXml(8), strict=True) assert len(sset) == 59 assert sset.totalLength == 85774 assert sset.numRecords == 59 sset.totalLength = -1 sset.numRecords = -1 assert sset.totalLength == -1 assert sset.numRecords == -1 sset.updateCounts() assert sset.totalLength == 85774 assert sset.numRecords == 59
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) self.assertEqual(sum(1 for _ in aln), 92) self.assertEqual(sum(len(rec) for rec in aln), 123588) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # SubreadSet sset = SubreadSet(data.getXml(10), strict=True) self.assertEqual(len(sset), 92) self.assertEqual(sset._length, (92, 124093)) self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) self.assertEqual(sum(1 for _ in sset), 92) self.assertEqual(sum(len(rec) for rec in sset), 124093) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)
def test_updateCounts(self): log.info("Testing updateCounts without filters") aln = AlignmentSet(data.getBam(0)) readers = aln.resourceReaders() expLen = 0 for reader in readers: for record in reader: expLen += record.readLength self.assertEqual( record.aStart, record.bam.pbi[record.rowNumber]['aStart']) self.assertEqual( record.aEnd, record.bam.pbi[record.rowNumber]['aEnd']) expNum = 0 for reader in readers: expNum += len(reader) accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum) log.info("Testing whether filters are respected") aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords def count(gen): count = 0 for _ in gen: count += 1 return count expLen = 0 for reader in readers: for record in reader: expLen += record.readLength bfile = openIndexedAlignmentFile(data.getBam(0)) rWin = (bfile.referenceInfo('E.faecalis.1').ID, 0, bfile.referenceInfo('E.faecalis.1').Length) reads = bfile.readsInRange(*rWin) expNum = count(reads) expLen = 0 reads = bfile.readsInRange(*rWin) for read in reads: expLen += read.readLength self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum)
def test_updateCounts(self): log.info("Testing updateCounts without filters") aln = AlignmentSet(data.getBam(0)) readers = aln.resourceReaders() expLen = 0 for reader in readers: for record in reader: expLen += record.readLength self.assertEqual(record.aStart, record.bam.pbi[record.rowNumber]['aStart']) self.assertEqual(record.aEnd, record.bam.pbi[record.rowNumber]['aEnd']) expNum = 0 for reader in readers: expNum += len(reader) accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum) log.info("Testing whether filters are respected") aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords def count(gen): count = 0 for _ in gen: count += 1 return count expLen = 0 for reader in readers: for record in reader: expLen += record.readLength bfile = openIndexedAlignmentFile(data.getBam(0)) rWin = (bfile.referenceInfo('E.faecalis.1').ID, 0, bfile.referenceInfo('E.faecalis.1').Length) reads = bfile.readsInRange(*rWin) expNum = count(reads) expLen = 0 reads = bfile.readsInRange(*rWin) for read in reads: expLen += read.readLength self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum)
def test_filter_cli(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, "filtered8.xml") log.debug(outfn) cmd = "dataset filter {i} {o} {f}".format(i=data.getXml(7), o=outfn, f="rname=E.faecalis.1") self._run_cmd_with_output(cmd, outfn) aln = AlignmentSet(data.getXml(7)) aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() dset = AlignmentSet(outfn) assert str(aln.filters) == str(dset.filters) assert aln.totalLength == dset.totalLength assert aln.numRecords == dset.numRecords
def test_filter_cli(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, "filtered8.xml") log.debug(outfn) cmd = "dataset filter {i} {o} {f}".format(i=data.getXml(8), o=outfn, f="rname=E.faecalis.1") log.debug(cmd) o, r, m = backticks(cmd) if r != 0: log.debug(m) self.assertEqual(r, 0) self.assertTrue(os.path.exists(outfn)) aln = AlignmentSet(data.getXml(8)) aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() dset = AlignmentSet(outfn) self.assertEqual(str(aln.filters), str(dset.filters)) self.assertEqual(aln.totalLength, dset.totalLength) self.assertEqual(aln.numRecords, dset.numRecords)
def test_filter_cli(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, "filtered8.xml") log.debug(outfn) cmd = "dataset filter {i} {o} {f}".format( i=data.getXml(8), o=outfn, f="rname=E.faecalis.1") log.debug(cmd) o, r, m = backticks(cmd) if r != 0: log.debug(m) self.assertEqual(r, 0) self.assertTrue(os.path.exists(outfn)) aln = AlignmentSet(data.getXml(8)) aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() dset = AlignmentSet(outfn) self.assertEqual(str(aln.filters), str(dset.filters)) self.assertEqual(aln.totalLength, dset.totalLength) self.assertEqual(aln.numRecords, dset.numRecords)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) self.assertEqual(sum(1 for _ in aln), 92) self.assertEqual(sum(len(rec) for rec in aln), 123588) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) # SubreadSet sset = SubreadSet(data.getXml(10), strict=True) self.assertEqual(len(sset), 92) self.assertEqual(sset._length, (92, 124093)) self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) self.assertEqual(sum(1 for _ in sset), 92) self.assertEqual(sum(len(rec) for rec in sset), 124093) # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # NO LONGER SUPPORTED AlignmentSet with cmp.h5 #aln = AlignmentSet(upstreamData.getCmpH5(), strict=True) #self.assertEqual(len(aln), 84) #self.assertEqual(aln._length, (84, 26103)) #self.assertEqual(aln.totalLength, 26103) #self.assertEqual(aln.numRecords, 84) #aln.totalLength = -1 #aln.numRecords = -1 #self.assertEqual(aln.totalLength, -1) #self.assertEqual(aln.numRecords, -1) #aln.updateCounts() #self.assertEqual(aln.totalLength, 26103) #self.assertEqual(aln.numRecords, 84) # SubreadSet # TODO Turn this back on when pbi's are fixed for subreadsets #sset = SubreadSet(data.getXml(10), strict=True) #self.assertEqual(len(sset), 92) #self.assertEqual(sset._length, (92, 123588)) #self.assertEqual(sset.totalLength, 123588) #self.assertEqual(sset.numRecords, 92) #sset.totalLength = -1 #sset.numRecords = -1 #self.assertEqual(sset.totalLength, -1) #self.assertEqual(sset.numRecords, -1) #sset.updateCounts() #self.assertEqual(sset.totalLength, 123588) #self.assertEqual(sset.numRecords, 92) # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) self.assertEqual(sum(1 for _ in aln), 92) self.assertEqual(sum(len(rec) for rec in aln), 123588) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) # SubreadSet sset = SubreadSet(data.getXml(10), strict=True) self.assertEqual(len(sset), 92) self.assertEqual(sset._length, (92, 124093)) self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) self.assertEqual(sum(1 for _ in sset), 92) self.assertEqual(sum(len(rec) for rec in sset), 124093) # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)