def test_shifts(self): """Test that interval bounds are shifted appropriately given the region start and end. """ region_start = 1000 region_end = 2000 intervals = [] intervals.append(interval_tree.Interval(500, 2500)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) self.assertEqual(numpy.max(cov_arr), 1) self.assertEqual(numpy.min(cov_arr), 1) self.assertEqual(len(cov_arr), 1000) intervals.append(interval_tree.Interval(500, 1500)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) self.assertTrue(all(cov_arr[:500] == 2)) self.assertTrue(all(cov_arr[500:] == 1)) intervals.append(interval_tree.Interval(1900, 10000)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) self.assertTrue(all(cov_arr[:500] == 2)) self.assertTrue(all(cov_arr[500:900] == 1)) self.assertTrue(all(cov_arr[900:] == 2))
def test_contiguous_gaps(self): """Test the counting of contiguous gaps in the coverage array. """ region_start = 0 region_end = 100 intervals = [] intervals.append(interval_tree.Interval(50, 55)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage(cov_arr) self.assertEqual(n_gaps, 2) self.assertEqual(tot_gaps, 95) intervals.append(interval_tree.Interval(75, 80)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage(cov_arr) self.assertEqual(n_gaps, 3) self.assertEqual(tot_gaps, 90)
def build_interval_lists(readers): """Create a dictionary with RefGroupId keys and values of intervals of alignment starts and ends for that reference. """ interval_lists = defaultdict(list) # keyed by reference group id for reader in readers: pbi = reader.pbi log.debug("{x}".format(x=reader)) for ref_id, start, end in zip(pbi.tId, pbi.tStart, pbi.tEnd): interval_lists[ref_id].append(interval_tree.Interval(start, end)) log.debug("Created interval lists for {n} references.".format( n=len(interval_lists))) return interval_lists
def test_boundaries(self): """Test that intervals in the IntervalTree are interpreted as the usual half-open [start, end) intervals. """ interval_list = [] interval_list.append(interval_tree.Interval(5, 10)) interval_list.append(interval_tree.Interval(5, 11)) interval_list.append(interval_tree.Interval(10, 15)) interval_list.append(interval_tree.Interval(20, 25)) itree = interval_tree.IntervalTree(interval_list) ivals = [] itree.find_overlapping(25, 26, ivals) self.assertEqual(len(ivals), 0) ivals = [] itree.find_overlapping(20, 21, ivals) self.assertEqual(len(ivals), 1) self.assertEqual(ivals[0].start, 20) self.assertEqual(ivals[0].stop, 25) ivals = [] itree.find_overlapping(5, 10, ivals) self.assertEqual(len(ivals), 2) self.assertEqual(ivals[0].start, 5) self.assertEqual(ivals[0].stop, 10) self.assertEqual(ivals[1].start, 5) self.assertEqual(ivals[1].stop, 11) ivals = [] itree.find_overlapping(10, 11, ivals) self.assertEqual(len(ivals), 2) self.assertEqual(ivals[0].start, 5) self.assertEqual(ivals[0].stop, 11) self.assertEqual(ivals[1].start, 10) self.assertEqual(ivals[1].stop, 15)
def test_adds_half_open(self): """Test that intervals add up correctly and are interpreted as half-open. """ region_start = 0 region_end = 100 intervals = [] intervals.append(interval_tree.Interval(5, 10)) intervals.append(interval_tree.Interval(10, 15)) for i in range(10): intervals.append(interval_tree.Interval(30, 40)) cov_arr = summarize_coverage.project_into_region( intervals, region_start, region_end) self.assertEqual(cov_arr[5], 1) self.assertEqual(cov_arr[10], 1) self.assertEqual(cov_arr[15], 0) self.assertEqual(cov_arr[30], 10) self.assertEqual(cov_arr[40], 0) self.assertEqual(len(cov_arr), 100)
def test_random_intervals(self): """Test random interval lists.""" n_cases = 1000 full_interval_size = 10000 n_regions_to_test = 50 max_intervals = 50 for case in xrange(n_cases): n_intervals = random.randrange(1, max_intervals) interval_list = [] for i in xrange(n_intervals): ival_length = random.randrange(full_interval_size / 10) ival_start = random.randrange(full_interval_size - ival_length) ival_end = ival_start + ival_length interval_list.append( interval_tree.Interval(ival_start, ival_end)) itree = interval_tree.IntervalTree(interval_list) test_region_starts = random.sample(range(full_interval_size), n_regions_to_test) for test_region_start in test_region_starts: test_region_length = random.randrange(full_interval_size - test_region_start) test_region_end = test_region_start + test_region_length true_overlapping_intervals = [] for interval in interval_list: if interval.start < test_region_end and test_region_start < interval.stop: true_overlapping_intervals.append(interval) itree_overlapping_intervals = [] itree.find_overlapping(test_region_start, test_region_end, itree_overlapping_intervals) self.assertEqual(len(true_overlapping_intervals), len(itree_overlapping_intervals)) for o_ival in true_overlapping_intervals: self.assertIn(o_ival, itree_overlapping_intervals)
def test_random_gaps(self): """Test random coverage arrays for contiguous gap counting.""" n_cases = 25 full_interval_size = 10000 n_regions_to_test = 50 max_intervals = 10 for case in xrange(n_cases): n_intervals = random.randrange(1, max_intervals) interval_list = [] for i in xrange(n_intervals): ival_length = random.randrange(full_interval_size / 20) ival_start = random.randrange(full_interval_size - ival_length) ival_end = ival_start + ival_length interval_list.append( interval_tree.Interval(ival_start, ival_end)) cov_arr = summarize_coverage.project_into_region( interval_list, 0, full_interval_size) n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage( cov_arr) exp_n_gaps, exp_tot_gaps = 0, 0 for i in xrange(len(cov_arr) - 1): if cov_arr[i] == 0: exp_tot_gaps += 1 if cov_arr[i] != 0 and cov_arr[i + 1] == 0: exp_n_gaps += 1 if cov_arr[-1] == 0: exp_tot_gaps += 1 if cov_arr[0] == 0: exp_n_gaps += 1 self.assertEqual(n_gaps, exp_n_gaps) self.assertEqual(tot_gaps, exp_tot_gaps)