def test_split_by_contigs_presplit(self): # Consumes too much memory for Jenkins # Test to make sure the result of a split by contigs has an appropriate # number of records (make sure filters are appropriately aggressive) ds2 = DataSet(data.getXml(14)) bams = ds2.externalResources.resourceIds assert len(bams) == 2 refwindows = ds2.refWindows assert refwindows == [(0, 0, 224992)] res1 = openIndexedAlignmentFile(bams[0][7:]) res2 = openIndexedAlignmentFile(bams[1][7:]) def count(iterable): count = 0 for _ in iterable: count += 1 return count assert count(res1.readsInRange(*refwindows[0])) == 1409 assert count(res2.readsInRange(*refwindows[0])) == 1375 assert count(ds2.readsInRange(*refwindows[0])) == 2784 assert count(ds2.records) == 2784 ds2.disableFilters() assert count(ds2.records) == 53552 assert ds2.countRecords() == 53552
def test_split_by_contigs_presplit(self): # Consumes too much memory for Jenkins # Test to make sure the result of a split by contigs has an appropriate # number of records (make sure filters are appropriately aggressive) ds2 = DataSet(data.getXml(15)) bams = ds2.externalResources.resourceIds self.assertEqual(len(bams), 2) refwindows = ds2.refWindows self.assertEqual(refwindows, [(0, 0, 224992)]) res1 = openIndexedAlignmentFile(bams[0][7:]) res2 = openIndexedAlignmentFile(bams[1][7:]) def count(iterable): count = 0 for _ in iterable: count += 1 return count self.assertEqual(count(res1.readsInRange(*refwindows[0])), 1409) self.assertEqual(count(res2.readsInRange(*refwindows[0])), 1375) self.assertEqual(count(ds2.readsInRange(*refwindows[0])), 2784) self.assertEqual(count(ds2.records), 2784) ds2.disableFilters() self.assertEqual(count(ds2.records), 53552) self.assertEqual(ds2.countRecords(), 53552)
def test_reads_in_range(self): ds = DataSet(data.getBam()) refNames = ds.refNames rn = refNames[15] reads = ds.readsInRange(rn, 10, 100) self.assertEqual(len(list(reads)), 10) ds2 = DataSet(data.getBam(0)) reads = ds2.readsInRange("E.faecalis.1", 0, 1400) self.assertEqual(len(list(reads)), 20) lengths = ds.refLengths for rname, rId in ds.refInfo('ID'): rn = ds._idToRname(rId) self.assertEqual(rname, rn) rlen = lengths[rn] self.assertEqual(len(list(ds.readsInReference(rn))), len(list(ds.readsInReference(rId)))) self.assertEqual(len(list(ds.readsInRange(rn, 0, rlen))), len(list(ds.readsInRange(rId, 0, rlen))))
def test_staggered_reads_in_range(self): ds = DataSet(data.getXml(8)) refNames = ds.refNames rn = 'B.vulgatus.5' reads = list(ds.readsInRange(rn, 0, 10000)) ds2 = DataSet(data.getXml(11)) reads2 = list(ds2.readsInRange(rn, 0, 10000)) dsBoth = DataSet(data.getXml(8), data.getXml(11)) readsBoth = list(dsBoth.readsInRange(rn, 0, 10000)) self.assertEqual(len(reads), 2) self.assertEqual(len(reads2), 5) self.assertEqual(len(readsBoth), 7) read_starts = (0, 1053) for read, start in zip(reads, read_starts): self.assertEqual(read.tStart, start) read2_starts = (0, 0, 3, 3, 4) for read, start in zip(reads2, read2_starts): self.assertEqual(read.tStart, start) readboth_starts = (0, 0, 0, 3, 3, 4, 1053) for read, start in zip(readsBoth, readboth_starts): self.assertEqual(read.tStart, start)