def testSharing(self): ss = [(x, x + 10) for x in range(0, 120, 20)] s = SegmentList(iter=ss, normalize=True) s.share("/testshare") n = SegmentList(share=s) self.assertEqual(s, n)
def testPickledSharing(self): ss = [(x, x + 10) for x in range(0, 120, 20)] s = SegmentList(iter=ss, normalize=True) s.share("/testshare") b = pickle.loads(pickle.dumps(s)) self.assertEqual(s, b)
def testMergeAdjacent(self): ss = [(x, x + 100) for x in range(0, 1000, 100)] random.shuffle(ss) s = SegmentList(iter=ss) s.merge(0) self.assertEqual(len(s), 1) self.assertEqual(s.sum(), 1000)
def testToFromIsochores(self): a = self.a.clone() orig = a.clone() isochores = IntervalCollection("isochores") # covering isochores of size 100 isochores.add("highGC", "contig1", SegmentList(iter=((x, x + 100) for x in range(0, 10000, 200)), normalize=True)) isochores.add("lowGC", "contig1", SegmentList(iter=((x, x + 100) for x in range(100, 10000, 200)), normalize=True)) isochores.add("highGC", "contig2", SegmentList(iter=((x, x + 100) for x in range(0, 10000, 200)), normalize=True)) isochores.add("lowGC", "contig2", SegmentList(iter=((x, x + 100) for x in range(100, 10000, 200)), normalize=True)) a.toIsochores(isochores) self.assertEqual(a.tracks, orig.tracks) self.assertEqual(sorted(a["track1"].keys()), sorted(['contig2.highGC', 'contig1.highGC', 'contig2.lowGC', 'contig1.lowGC'])) a.fromIsochores() self.check(a, orig)
def testOverlap(self): b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) b.subtract(self.a) c = [(10, 15), (110, 115), (210, 215), (310, 315), (410, 415), (510, 515), (610, 615), (710, 715), (810, 815), (910, 915)] self.assertEqual(b.asList(), c)
def testUnsharing(self): ss = [(x, x + 10) for x in range(0, 120, 20)] s = SegmentList(iter=ss, normalize=True) s.share("/testshare") s.unshare() s.share("/testshare") s.unshare()
def setUp(self): self.a = IntervalCollection("a") self.a.add("track1", "contig1", SegmentList(iter=((x, x + 10) for x in range(0, 1000, 100)), normalize=True)) self.a.add("track1", "contig2", SegmentList(iter=((x, x + 10) for x in range(0, 1000000, 100)), normalize=True)) self.a.add("track2", "contig1", SegmentList(iter=((x, x + 10) for x in range(1000, 2000, 100)), normalize=True))
def testInsertionPoint(self): '''check insertion point for normalized segment lists''' ss = [(x, x + 10) for x in range(0, 100, 10)] s = SegmentList(iter=ss, normalize=True) for point in range(0, 100): p = s.getInsertionPoint(point, point + 1) self.assertEqual(p, point // 10) ss = [(x, x + 10) for x in range(0, 100, 20)] s = SegmentList(iter=ss, normalize=True) for point in range(0, 100): p = s.getInsertionPoint(point, point + 1) if point >= 90: self.assertEqual(p, len(s)) else: self.assertEqual(p, point // 20) ss = [(x, x + 10) for x in range(10, 100, 20)] s = SegmentList(iter=ss, normalize=True) for point in range(0, 100): p = s.getInsertionPoint(point, point + 1) self.assertEqual(p, (point - 10) // 20)
def setUp(self): a = IntervalCollection("a") # every ten at a.add("track1", "contig1", SegmentList(iter=((x, x + 250) for x in range(0, 2000, 500)), normalize=True)) a.add("track1", "contig2", SegmentList(iter=((x, x + 250) for x in range(0, 2000, 500)), normalize=True)) self.a = a
def testNormalize5(self): ss = [(489, 589), (1966, 2066), (2786, 2886), (0, 0), (3889, 3972), (3998, 4098), (6441, 6541), (6937, 7054), (7392, 7492), (8154, 8254), (9046, 9146)] s = SegmentList(iter=ss) s.normalize() self.assertEqual(len(s), len([x for x in ss if x[1] - x[0] > 0])) self.assertEqual(s.sum(), 1000) s2 = SegmentList(iter=ss) s2.merge(-1) self.assertEqual(s, s2)
def testGetFilledSegmentsFromEnd(self): ss = [(x, x + 10) for x in range(0, 120, 20)] s = SegmentList(iter=ss, normalize=True) for x in range(0, 120, 5): f = s.getFilledSegmentsFromEnd(x, 20) self.assertEqual(f.sum(), 20) if x == 0: self.assertEqual(f.max(), s.max()) self.assertEqual(f.min(), 80) elif x < 30: self.assertEqual(f.max(), s.max()) self.assertEqual(f.min(), s.min()) else: if x in (40, 60, 80, 100): self.assertEqual(f.min(), x - 40) self.assertEqual(f.max(), x - 10) elif x in (30, 50, 70, 90): self.assertEqual(f.min(), x - 30) self.assertEqual(f.max(), x) elif x in (45, 65, 85): self.assertEqual(f.min(), x - 40) self.assertEqual(f.max(), x) elif x in (35, 55, 75, 95): self.assertEqual(f.min(), x - 35) self.assertEqual(f.max(), x - 5) self.assertEqual(s.sum(), s.getFilledSegmentsFromEnd(x, 100).sum())
def testGetFilledSegmentsFromStart(self): ss = [(x, x + 10) for x in range(0, 120, 20)] s = SegmentList(iter=ss, normalize=True) for x in range(0, 120, 5): f = s.getFilledSegmentsFromStart(x, 20) self.assertEqual(f.sum(), 20) if x >= 110: self.assertEqual(f.min(), s.min()) self.assertEqual(f.max(), 30) elif x > 80: self.assertEqual(f.min(), s.min()) self.assertEqual(f.max(), s.max()) else: if x in (0, 20, 40, 60, 80): self.assertEqual(f.min(), x) self.assertEqual(f.max(), x + 30) elif x in (10, 30, 50, 70): self.assertEqual(f.min(), x + 10) self.assertEqual(f.max(), x + 40) elif x in (5, 25, 45, 65): self.assertEqual(f.min(), x) self.assertEqual(f.max(), x + 40) elif x in (15, 35, 55, 75): self.assertEqual(f.min(), x + 5) self.assertEqual(f.max(), x + 35) self.assertEqual( s.sum(), s.getFilledSegmentsFromStart(x, 100).sum())
def testExtend(self): s1 = SegmentList(iter=[(x, x + 100) for x in range(0, 1000, 100)]) s2 = SegmentList(iter=[(x, x + 100) for x in range(2000, 3000, 100)]) s1.extend(s2) self.assertEqual(s1.sum(), s2.sum() * 2) self.assertEqual(len(s1), len(s2) * 2)
def testNormalize4(self): # test multiple interleaved segments ss = [(x, x + 100) for x in range(0, 1000, 10)] s = SegmentList() for start, end in ss: s.add(start, end) s.normalize() self.assertEqual(len(s), 1) self.assertEqual(s.sum(), 1090) s2 = SegmentList(iter=ss) s2.merge(-1) self.assertEqual(s, s2)
def testFromSegments(self): l = [(489, 589), (1966, 2066), (2786, 2886), (3889, 3972), (3998, 4098), (6441, 6541), (6937, 7054), (7392, 7492), (8154, 8254), (9046, 9146)] ss = SegmentList(iter=l) pp = PositionList() pp.fromSegmentList(ss) self.assertEqual(len(ss), len(pp))
def testNormalize1b(self): '''non-overlapping segments.''' ss = [(x, x + 10) for x in range(100, 1100, 100)] random.shuffle(ss) s = SegmentList() for start, end in ss: s.add(start, end) s.normalize() self.assertEqual(len(s), 10) self.assertEqual(s.sum(), 100) s2 = SegmentList(iter=ss) s2.merge(-1) self.assertEqual(s, s2)
def testInsertionPointNonNormalized(self): '''check insertion point for unnormalized segment lists.''' ss = [(x, x + 20) for x in range(0, 100, 10)] s = SegmentList(iter=ss, normalize=False) for point in range(0, 100): self.assertRaises( AssertionError, s.getInsertionPoint, point, point + 1)
def testNormalizeEmpty(self): '''non-overlapping segments.''' s = SegmentList() self.assertEqual(len(s), 0) s.normalize() self.assertEqual(len(s), 0) self.assertEqual(s.isNormalized, 1) s2 = SegmentList() s2.merge(-1) self.assertEqual(s, s2)
class TestSegmentListSubtract(GatTest): def setUp(self): self.a = SegmentList( iter=((x, x + 10) for x in range(0, 1000, 100)), normalize=True) def testCompleteOverlap(self): b = SegmentList(iter=[(0, 1000)], normalize=True) b.subtract(self.a) c = [(10, 100), (110, 200), (210, 300), (310, 400), (410, 500), (510, 600), (610, 700), (710, 800), (810, 900), (910, 1000)] self.assertEqual(b.asList(), c) def testFullSubtraction(self): b = SegmentList(iter=[(0, 1000)], normalize=True) self.a.subtract(b) self.assertEqual(len(self.a), 0) def testSelfSubtraction(self): self.a.subtract(self.a) self.assertEqual(len(self.a), 0) def testSameSubtraction(self): b = SegmentList(clone=self.a) b.subtract(self.a) self.assertEqual(len(b), 0) def testOverlap(self): b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) b.subtract(self.a) c = [(10, 15), (110, 115), (210, 215), (310, 315), (410, 415), (510, 515), (610, 615), (710, 715), (810, 815), (910, 915)] self.assertEqual(b.asList(), c) def testSingleSegmentSubtraction(self): a = SegmentList(iter=[(0, 12000)], normalize=True) b = SegmentList(iter=[(0, 10000)], normalize=True) a.subtract(b) self.assertEqual(a.asList(), [(10000, 12000)])
def testCreateAndClear(self): s = SegmentList() self.assertEqual(0, len(s)) s.add(0, 100) self.assertEqual(1, len(s)) s.clear() self.assertEqual(0, len(s))
def testNormalize2(self): '''overlapping segments.''' ss = [(x, x + 1000) for x in range(0, 1000, 100)] random.shuffle(ss) s = SegmentList() for start, end in ss: s.add(start, end) s.normalize() self.assertEqual(len(s), 1) self.assertEqual(s.sum(), 1900)
def testOverlap(self): '''test if number of segments intersection is correct.''' b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) self.assertEqual(self.a.intersectionWithSegments(b), len(b)) self.assertEqual(b.intersectionWithSegments(self.a), len(b)) # no intersection b = SegmentList(iter=((x, x + 10) for x in range(10, 1000, 100)), normalize = True) self.assertEqual(self.a.intersectionWithSegments(b), 0) self.assertEqual(b.intersectionWithSegments(self.a), 0) # double the number of segments in b b = SegmentList(iter=[(x, x + 5) for x in range(0, 1000, 100)] + [(x + 5, x + 10) for x in range(0, 1000, 100)], normalize=True) self.assertEqual(self.a.intersectionWithSegments(b), 10) self.assertEqual(b.intersectionWithSegments(self.a), 20)
def testTrim(self): '''test trimming over full range of insertion points and deletions.''' for point in range(0, 1000): for size in range(0, 300): ss = [(x, x + 100) for x in range(0, 1000, 100)] s = SegmentList(iter=ss, normalize=True) orig = s.sum() s.trim(point, size) self.assertEqual(orig - size, s.sum(), "trimming error at %i:%i: expected %i, got %i, %s" % (point, size, orig - size, s.sum(), str(s)))
def testOverlapWithSegments(self): # single point per position pp = PositionList(iter=list(range(0, 1000, 100)), sort=True) for o in range(0, 200, 10): ss = SegmentList(iter=[(x, x + 1) for x in range(0 + o, 1000 + o, 100)], normalize=True) if o % 100 == 0: if o == 100: self.assertEqual(9, pp.intersectionWithSegments(ss)) else: self.assertEqual(10, pp.intersectionWithSegments(ss)) else: self.assertEqual(0, pp.intersectionWithSegments(ss))
def testIntersect(self): for o in range(0, 200, 10): # single point per position pp = PositionList(iter=list(range(0, 1000, 100)), sort=True) ss = SegmentList(iter=[(x, x + 1) for x in range(0 + o, 1000 + o, 100)], normalize=True) pp.intersect(ss) if o % 100 == 0: if o == 100: self.assertEqual(9, len(pp)) else: self.assertEqual(10, len(pp)) else: self.assertEqual(0, len(pp))
def testMergeNeighbours(self): for y in range(0, 5): ss = [(x, x + 100 - y) for x in range(0, 1000, 100)] random.shuffle(ss) for x in range(0, y + 1): s = SegmentList(iter=ss) s.merge(x) if x < y: self.assertEqual(len(s), 10) self.assertEqual(s.sum(), 1000 - 10 * y) else: self.assertEqual(len(s), 1) self.assertEqual(s.sum(), 1000 - y)
def testDelete(self): '''test to track down a memory leak.''' # from guppy import hpy # hp = hpy() # hp.setrelheap() return samples = Samples() for track in range(self.ntracks): track_id = str(track) # print track_id for sample in range(self.nsamples): sample_id = str(sample) for isochore in range(self.nisochores): isochore_id = str(isochore) r = SegmentList(allocate=self.nsegments) samples.add(track_id, sample_id, isochore_id, r) del samples[track_id]
def testIntersectionCopy(self): b = SegmentList(clone=self.a) b.intersect(self.a) self.assertEqual(b.asList(), self.a.asList())
def testPartialIntersection(self): b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) b.intersect(self.a) self.assertEqual(len(b), len(self.a)) self.assertEqual(b.sum(), self.a.sum() / 2)
def testIntersectionFull(self): b = SegmentList(iter=[(0, 1000)], normalize=True) b.intersect(self.a) self.assertEqual(b.asList(), self.a.asList())
def testFilter(self): b = SegmentList(iter=((x, x + 5) for x in range(500, 2000, 100)), normalize=True) b.filter(self.a) self.assertEqual(b.asList(), [ (500, 505), (600, 605), (700, 705), (800, 805), (900, 905)]) b = SegmentList(iter=((0, 56), )) c = SegmentList(iter=[(0, 50), (75, 125)]) b.filter(c) self.assertEqual(b.asList(), [(0, 56)]) b = SegmentList(iter=((0, 56), )) c = SegmentList(iter=[(0, 10)]) b.filter(c) self.assertEqual(b.asList(), [(0, 56)])
def setUp(self): self.a = SegmentList( iter=((x, x + 10) for x in range(0, 1000, 100)), normalize=True)
def testCaching(self): return workspaces, segments, annotations = \ IntervalCollection( "workspace" ), \ IntervalCollection( "segment" ), \ IntervalCollection("annotation") workspaces.add("default", "chr1", SegmentList(iter=[(0, self.workspace_size), ], normalize=True)) workspace = workspaces["default"] segments.add("default", "chr1", SegmentList(iter=[(0, 1), ], normalize=True)) # annotations: a collection of segments with increasing density # all are overlapping the segments for y in range(1, 100, 2): annotations.add("%03i" % y, "chr1", SegmentList(iter=[(0, y), ], normalize=True)) workspace_size = workspace["chr1"].sum() sampler = SamplerAnnotator(bucket_size=1, nbuckets=self.workspace_size) if os.path.exists("test.cache"): os.remove("test.cache") outsamples = SamplesCached("test.cache") saved_samples = {} for track in segments.tracks: segs = segments[track] for x in range(self.sample_size): for isochore in list(segs.keys()): r = sampler.sample(segs[isochore], workspace[isochore]) saved_samples[(track, x, isochore)] = r outsamples.add(track, x, isochore, r) del outsamples insamples = SamplesCached("test.cache") for track in segments.tracks: segs = segments[track] for x in range(self.sample_size): for isochore in list(segs.keys()): insamples.load(track, x, isochore) # compare for track in segments.tracks: segs = segments[track] for x in range(self.sample_size): for isochore in list(segs.keys()): self.assertEqual(saved_samples[(track, x, isochore)].asList(), insamples[track][x][isochore].asList()) if os.path.exists("test.cache"): os.remove("test.cache") if os.path.exists("test.cache.idx"): os.remove("test.cache.idx")
def testCompleteOverlap(self): b = SegmentList(iter=[(0, 1000)], normalize=True) b.subtract(self.a) c = [(10, 100), (110, 200), (210, 300), (310, 400), (410, 500), (510, 600), (610, 700), (710, 800), (810, 900), (910, 1000)] self.assertEqual(b.asList(), c)
'''test script to check memory usage in multiprocessing.''' import multiprocessing from gat.SegmentList import SegmentList nsegments = 100000000 nsegments = 10000000 ncpu = 2 nwork = 100 s = SegmentList(iter=[(x, x + 1) for x in range(0, nsegments, 2)], normalize=True) print("built list") r = input("press return") s.share("/test") print("shared data") r = input("press return") def dowork(segs): while 1: pass return segs.sum() p = multiprocessing.Pool(ncpu) print("starting mp")
def testNormalizeEmptySegment(self): s = SegmentList(iter=[(0, 0), ]) s.normalize() self.assertEqual(s.isNormalized, 1) self.assertEqual(len(s), 0) s = SegmentList(iter=[(0, 0), (0, 0)]) s.normalize() self.assertEqual(s.isNormalized, 1) self.assertEqual(len(s), 0) ss = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9)] s = SegmentList(iter=ss) s.normalize() self.assertEqual(s.isNormalized, 1) self.assertEqual(len(s), 1) s2 = SegmentList(iter=ss) s2.merge(-1) self.assertEqual(s, s2)
def testNoIntersection(self): b = SegmentList(iter=((x, x + 10) for x in range(10, 1000, 100)), normalize = True) b.intersect(self.a) self.assertEqual(b.asList(), []) self.assertEqual(b.isEmpty, True)
def testSingleSegmentSubtraction(self): a = SegmentList(iter=[(0, 12000)], normalize=True) b = SegmentList(iter=[(0, 10000)], normalize=True) a.subtract(b) self.assertEqual(a.asList(), [(10000, 12000)])
class TestSegmentListIntersection(GatTest): def setUp(self): self.a = SegmentList( iter=((x, x + 10) for x in range(0, 1000, 100)), normalize=True) def testIntersectionFull(self): b = SegmentList(iter=[(0, 1000)], normalize=True) b.intersect(self.a) self.assertEqual(b.asList(), self.a.asList()) def testIntersectionSelf(self): self.a.intersect(self.a) self.assertEqual(self.a.asList(), self.a.asList()) def testIntersectionCopy(self): b = SegmentList(clone=self.a) b.intersect(self.a) self.assertEqual(b.asList(), self.a.asList()) def testNoIntersection(self): b = SegmentList(iter=((x, x + 10) for x in range(10, 1000, 100)), normalize = True) b.intersect(self.a) self.assertEqual(b.asList(), []) self.assertEqual(b.isEmpty, True) def testPartialIntersection(self): b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) b.intersect(self.a) self.assertEqual(len(b), len(self.a)) self.assertEqual(b.sum(), self.a.sum() / 2) def testOverlap(self): '''test if number of segments intersection is correct.''' b = SegmentList(iter=((x, x + 10) for x in range(5, 1000, 100)), normalize = True) self.assertEqual(self.a.intersectionWithSegments(b), len(b)) self.assertEqual(b.intersectionWithSegments(self.a), len(b)) # no intersection b = SegmentList(iter=((x, x + 10) for x in range(10, 1000, 100)), normalize = True) self.assertEqual(self.a.intersectionWithSegments(b), 0) self.assertEqual(b.intersectionWithSegments(self.a), 0) # double the number of segments in b b = SegmentList(iter=[(x, x + 5) for x in range(0, 1000, 100)] + [(x + 5, x + 10) for x in range(0, 1000, 100)], normalize=True) self.assertEqual(self.a.intersectionWithSegments(b), 10) self.assertEqual(b.intersectionWithSegments(self.a), 20) def testFilter(self): b = SegmentList(iter=((x, x + 5) for x in range(500, 2000, 100)), normalize=True) b.filter(self.a) self.assertEqual(b.asList(), [ (500, 505), (600, 605), (700, 705), (800, 805), (900, 905)]) b = SegmentList(iter=((0, 56), )) c = SegmentList(iter=[(0, 50), (75, 125)]) b.filter(c) self.assertEqual(b.asList(), [(0, 56)]) b = SegmentList(iter=((0, 56), )) c = SegmentList(iter=[(0, 10)]) b.filter(c) self.assertEqual(b.asList(), [(0, 56)])
def testSameSubtraction(self): b = SegmentList(clone=self.a) b.subtract(self.a) self.assertEqual(len(b), 0)