def test_shifts(self):
        """Test that interval bounds are shifted appropriately given the region start and end.
         """

        region_start = 1000
        region_end = 2000

        intervals = []
        intervals.append(interval_tree.Interval(500, 2500))
        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)
        self.assertEqual(numpy.max(cov_arr), 1)
        self.assertEqual(numpy.min(cov_arr), 1)
        self.assertEqual(len(cov_arr), 1000)

        intervals.append(interval_tree.Interval(500, 1500))
        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)
        self.assertTrue(all(cov_arr[:500] == 2))
        self.assertTrue(all(cov_arr[500:] == 1))

        intervals.append(interval_tree.Interval(1900, 10000))
        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)
        self.assertTrue(all(cov_arr[:500] == 2))
        self.assertTrue(all(cov_arr[500:900] == 1))
        self.assertTrue(all(cov_arr[900:] == 2))
    def test_contiguous_gaps(self):
        """Test the counting of contiguous gaps in the coverage array.
        """
        region_start = 0
        region_end = 100

        intervals = []
        intervals.append(interval_tree.Interval(50, 55))
        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)
        n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage(cov_arr)
        self.assertEqual(n_gaps, 2)
        self.assertEqual(tot_gaps, 95)

        intervals.append(interval_tree.Interval(75, 80))
        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)
        n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage(cov_arr)
        self.assertEqual(n_gaps, 3)
        self.assertEqual(tot_gaps, 90)
Ejemplo n.º 3
0
def build_interval_lists(readers):
    """Create a dictionary with RefGroupId keys and values of
    intervals of alignment starts and ends for that reference.
    """
    interval_lists = defaultdict(list)  # keyed by reference group id
    for reader in readers:
        pbi = reader.pbi
        log.debug("{x}".format(x=reader))
        for ref_id, start, end in zip(pbi.tId, pbi.tStart, pbi.tEnd):
            interval_lists[ref_id].append(interval_tree.Interval(start, end))
    log.debug("Created interval lists for {n} references.".format(
        n=len(interval_lists)))
    return interval_lists
Ejemplo n.º 4
0
    def test_boundaries(self):
        """Test that intervals in the IntervalTree are interpreted
        as the usual half-open [start, end) intervals.
        """

        interval_list = []
        interval_list.append(interval_tree.Interval(5, 10))
        interval_list.append(interval_tree.Interval(5, 11))
        interval_list.append(interval_tree.Interval(10, 15))
        interval_list.append(interval_tree.Interval(20, 25))

        itree = interval_tree.IntervalTree(interval_list)

        ivals = []
        itree.find_overlapping(25, 26, ivals)
        self.assertEqual(len(ivals), 0)

        ivals = []
        itree.find_overlapping(20, 21, ivals)
        self.assertEqual(len(ivals), 1)
        self.assertEqual(ivals[0].start, 20)
        self.assertEqual(ivals[0].stop, 25)

        ivals = []
        itree.find_overlapping(5, 10, ivals)
        self.assertEqual(len(ivals), 2)
        self.assertEqual(ivals[0].start, 5)
        self.assertEqual(ivals[0].stop, 10)
        self.assertEqual(ivals[1].start, 5)
        self.assertEqual(ivals[1].stop, 11)

        ivals = []
        itree.find_overlapping(10, 11, ivals)
        self.assertEqual(len(ivals), 2)
        self.assertEqual(ivals[0].start, 5)
        self.assertEqual(ivals[0].stop, 11)
        self.assertEqual(ivals[1].start, 10)
        self.assertEqual(ivals[1].stop, 15)
    def test_adds_half_open(self):
        """Test that intervals add up correctly and are interpreted as half-open.
        """

        region_start = 0
        region_end = 100

        intervals = []
        intervals.append(interval_tree.Interval(5, 10))
        intervals.append(interval_tree.Interval(10, 15))

        for i in range(10):
            intervals.append(interval_tree.Interval(30, 40))

        cov_arr = summarize_coverage.project_into_region(
            intervals, region_start, region_end)

        self.assertEqual(cov_arr[5], 1)
        self.assertEqual(cov_arr[10], 1)
        self.assertEqual(cov_arr[15], 0)
        self.assertEqual(cov_arr[30], 10)
        self.assertEqual(cov_arr[40], 0)

        self.assertEqual(len(cov_arr), 100)
Ejemplo n.º 6
0
    def test_random_intervals(self):
        """Test random interval lists."""

        n_cases = 1000
        full_interval_size = 10000
        n_regions_to_test = 50
        max_intervals = 50

        for case in xrange(n_cases):
            n_intervals = random.randrange(1, max_intervals)
            interval_list = []
            for i in xrange(n_intervals):
                ival_length = random.randrange(full_interval_size / 10)
                ival_start = random.randrange(full_interval_size - ival_length)
                ival_end = ival_start + ival_length
                interval_list.append(
                    interval_tree.Interval(ival_start, ival_end))

            itree = interval_tree.IntervalTree(interval_list)

            test_region_starts = random.sample(range(full_interval_size),
                                               n_regions_to_test)
            for test_region_start in test_region_starts:
                test_region_length = random.randrange(full_interval_size -
                                                      test_region_start)
                test_region_end = test_region_start + test_region_length

                true_overlapping_intervals = []
                for interval in interval_list:
                    if interval.start < test_region_end and test_region_start < interval.stop:
                        true_overlapping_intervals.append(interval)

                itree_overlapping_intervals = []
                itree.find_overlapping(test_region_start, test_region_end,
                                       itree_overlapping_intervals)

                self.assertEqual(len(true_overlapping_intervals),
                                 len(itree_overlapping_intervals))

                for o_ival in true_overlapping_intervals:
                    self.assertIn(o_ival, itree_overlapping_intervals)
    def test_random_gaps(self):
        """Test random coverage arrays for contiguous gap counting."""

        n_cases = 25
        full_interval_size = 10000
        n_regions_to_test = 50
        max_intervals = 10

        for case in xrange(n_cases):
            n_intervals = random.randrange(1, max_intervals)
            interval_list = []
            for i in xrange(n_intervals):
                ival_length = random.randrange(full_interval_size / 20)
                ival_start = random.randrange(full_interval_size - ival_length)
                ival_end = ival_start + ival_length
                interval_list.append(
                    interval_tree.Interval(ival_start, ival_end))

            cov_arr = summarize_coverage.project_into_region(
                interval_list, 0, full_interval_size)

            n_gaps, tot_gaps = summarize_coverage.get_gaps_from_coverage(
                cov_arr)
            exp_n_gaps, exp_tot_gaps = 0, 0
            for i in xrange(len(cov_arr) - 1):
                if cov_arr[i] == 0:
                    exp_tot_gaps += 1
                if cov_arr[i] != 0 and cov_arr[i + 1] == 0:
                    exp_n_gaps += 1
            if cov_arr[-1] == 0:
                exp_tot_gaps += 1
            if cov_arr[0] == 0:
                exp_n_gaps += 1

            self.assertEqual(n_gaps, exp_n_gaps)
            self.assertEqual(tot_gaps, exp_tot_gaps)