Esempio n. 1
0
 def test_upper_bound_is_none(self):
     im = IntervalMetadata(None)
     # should not raise error
     im.add([(0, 1000000000)])
     self.assertIsNone(im.upper_bound)
     with self.assertRaisesRegex(TypeError, 'upper bound is `None`'):
         im._reverse()
     with self.assertRaisesRegex(TypeError, 'upper bound is `None`'):
         IntervalMetadata.concat([self.im_1, im])
Esempio n. 2
0
 def test_upper_bound_is_none(self):
     im = IntervalMetadata(None)
     # should not raise error
     im.add([(0, 1000000000)])
     self.assertIsNone(im.upper_bound)
     with self.assertRaisesRegex(
             TypeError, r'upper bound is `None`'):
         im._reverse()
     with self.assertRaisesRegex(
             TypeError, r'upper bound is `None`'):
         IntervalMetadata.concat([self.im_1, im])
Esempio n. 3
0
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin):
    def setUp(self):
        self.upper_bound = 10
        self.im_empty = IntervalMetadata(self.upper_bound)
        self.im_1 = IntervalMetadata(self.upper_bound)
        self.im_1_1 = Interval(interval_metadata=self.im_1,
                               bounds=[(1, 2), (4, self.upper_bound)],
                               metadata={
                                   'gene': 'sagA',
                                   'bound': 0
                               })
        self.im_2 = IntervalMetadata(self.upper_bound)
        self.im_2_1 = Interval(interval_metadata=self.im_2,
                               bounds=[(1, 2), (4, self.upper_bound)],
                               metadata={
                                   'gene': 'sagA',
                                   'bound': 0
                               })
        self.im_2_2 = Interval(interval_metadata=self.im_2,
                               bounds=[(3, 5)],
                               metadata={
                                   'gene': 'sagB',
                                   'bound': 0,
                                   'spam': [0]
                               })

    def test_copy_empty(self):
        obs = copy(self.im_empty)
        self.assertEqual(obs, self.im_empty)
        self.assertIsNot(obs._intervals, self.im_empty._intervals)
        self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_copy(self):
        obs = copy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_deepcopy(self):
        obs = deepcopy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1.metadata, i2.metadata)

        i2.metadata['spam'].append(1)
        self.assertEqual(i2.metadata, {
            'gene': 'sagB',
            'bound': 0,
            'spam': [0, 1]
        })
        self.assertEqual(i1.metadata, {
            'gene': 'sagB',
            'bound': 0,
            'spam': [0]
        })

    def test_deepcopy_memo_is_respected(self):
        memo = {}
        deepcopy(self.im_1, memo)
        self.assertGreater(len(memo), 2)

    def test_init(self):
        self.assertFalse(self.im_empty._is_stale_tree)
        self.assertEqual(self.im_empty._intervals, [])

    def test_init_upper_bound_lt_lower_bound(self):
        # test that no exception is raised
        IntervalMetadata(0)

        with self.assertRaises(ValueError):
            IntervalMetadata(-1)

    def test_upper_bound_is_none(self):
        im = IntervalMetadata(None)
        # should not raise error
        im.add([(0, 1000000000)])
        self.assertIsNone(im.upper_bound)
        with self.assertRaisesRegex(TypeError, 'upper bound is `None`'):
            im._reverse()
        with self.assertRaisesRegex(TypeError, 'upper bound is `None`'):
            IntervalMetadata.concat([self.im_1, im])

    def test_init_copy_from(self):
        for i in [None, 99, 999]:
            obs = IntervalMetadata(i, self.im_1)
            exp = IntervalMetadata(i)
            exp.add(bounds=[(1, 2), (4, self.upper_bound)],
                    metadata={
                        'gene': 'sagA',
                        'bound': 0
                    })
            self.assertEqual(obs, exp)

    def test_init_copy_from_empty(self):
        for i in [None, 0, 9, 99, 999]:
            obs = IntervalMetadata(i, self.im_empty)
            exp = IntervalMetadata(i)
            self.assertEqual(obs, exp)
            # test it is shallow copy
            self.assertIsNot(obs._intervals, self.im_empty._intervals)
            self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_init_copy_from_shallow_copy(self):
        obs = IntervalMetadata(self.upper_bound, self.im_2)
        self.assertEqual(self.im_2, obs)
        # test it is shallow copy
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)
        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_init_copy_from_error(self):
        i = self.upper_bound - 1
        with self.assertRaisesRegex(ValueError,
                                    r'larger than upper bound \(%r\)' % i):
            IntervalMetadata(i, self.im_2)

    def test_num_interval_features(self):
        self.assertEqual(self.im_empty.num_interval_features, 0)
        self.assertEqual(self.im_1.num_interval_features, 1)
        self.assertEqual(self.im_2.num_interval_features, 2)

    def test_duplicate(self):
        '''Test query and drop methods on duplicate Intervals.'''
        intvl_1 = self.im_empty.add([(1, 2)])
        intvl_2 = self.im_empty.add([(1, 2)])
        self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2)
        self.im_empty.drop([intvl_1])
        self.assertEqual(len(self.im_empty._intervals), 1)
        self.assertTrue(self.im_empty._intervals[0] is intvl_2)

    def test_duplicate_bounds(self):
        intvl = self.im_empty.add([(1, 2), (1, 2)])
        intvls = list(self.im_empty.query([(1, 2)]))
        self.assertEqual(len(intvls), 1)
        self.assertTrue(intvl is intvls[0])

    def test_concat_empty(self):
        for i in 0, 1, 2:
            obs = IntervalMetadata.concat([self.im_empty] * i)
            exp = IntervalMetadata(self.upper_bound * i)
            self.assertEqual(obs, exp)

        obs = IntervalMetadata.concat([])
        self.assertEqual(obs, IntervalMetadata(0))

    def test_concat(self):
        im1 = IntervalMetadata(3)
        im2 = IntervalMetadata(4)
        im3 = IntervalMetadata(5)
        im1.add([(0, 2)], [(True, True)])
        im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'})
        im2.add([(2, 4)], metadata={'gene': 'sagB'})
        im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'})
        obs = IntervalMetadata.concat([im1, im2, im3])

        exp = IntervalMetadata(12)
        exp.add(bounds=[(0, 2)], fuzzy=[(True, True)])
        exp.add(bounds=[(3, 6)],
                fuzzy=[(True, False)],
                metadata={'gene': 'sagA'})
        exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'})
        exp.add(bounds=[(8, 12)],
                fuzzy=[(False, True)],
                metadata={'gene': 'sagC'})
        self.assertEqual(obs, exp)

    def test_merge(self):
        # empty + empty
        im = IntervalMetadata(self.upper_bound)
        self.im_empty.merge(im)
        self.assertEqual(self.im_empty, im)
        # empty + non-empty
        self.im_empty.merge(self.im_1)
        self.assertEqual(self.im_empty, self.im_1)
        # non-empty + non-empty
        self.im_empty.merge(self.im_2)
        self.im_2.merge(self.im_1)
        self.assertEqual(self.im_empty, self.im_2)

    def test_merge_unequal_upper_bounds(self):
        n = 3
        im1 = IntervalMetadata(n)
        for im in [self.im_empty, self.im_1]:
            with self.assertRaisesRegex(
                    ValueError,
                    r'not equal \(%d != %d\)' % (self.upper_bound, n)):
                im.merge(im1)

    def test_merge_to_unbounded(self):
        for im in [self.im_empty, self.im_1, IntervalMetadata(None)]:
            obs = IntervalMetadata(None)
            obs.merge(im)
            self.assertIsNone(obs.upper_bound)
            self.assertEqual(obs._intervals, im._intervals)

    def test_merge_unbounded_to_bounded(self):
        im = IntervalMetadata(None)
        with self.assertRaisesRegex(
                ValueError, 'Cannot merge an unbound IntervalMetadata object '
                'to a bounded one'):
            self.im_1.merge(im)
        # original im is not changed
        self.assertIsNone(im.upper_bound)
        self.assertEqual(im._intervals, [])

    def test_sort(self):
        interval = Interval(self.im_2, [(1, 2), (3, 8)],
                            metadata={
                                'gene': 'sagA',
                                'bound': 0
                            })
        im = deepcopy(self.im_2)
        self.im_2.sort(False)
        # check sorting does not have other side effects
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [self.im_2_2, self.im_2_1, interval])

        self.im_2.sort()
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [interval, self.im_2_1, self.im_2_2])

        self.im_empty.sort()
        self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound))

    def test_add_eq_upper_bound(self):
        self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)],
                          metadata={
                              'gene': 'sagA',
                              'bound': 0
                          })
        self.assertTrue(self.im_empty._is_stale_tree)
        interval = self.im_empty._intervals[0]
        self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)])
        self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0})
        self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree))

    def test_add_gt_upper_bound(self):
        with self.assertRaises(ValueError):
            self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound + 1)],
                              metadata={
                                  'gene': 'sagA',
                                  'bound': 0
                              })

    def test_add_eq_start_end_bound(self):
        for i in 0, 1, self.upper_bound:
            # test that no exception is raised
            self.im_empty.add(bounds=[(i, i)],
                              metadata={
                                  'gene': 'sagA',
                                  'bound': 0
                              })

    def test_query_attribute(self):
        intervals = self.im_2._query_attribute({})
        for i, j in zip(intervals, self.im_2._intervals):
            self.assertEqual(i, j)

        intervals = list(self.im_2._query_attribute(None))
        self.assertEqual(len(intervals), 0)

        for i in self.im_2._intervals:
            intervals = list(self.im_2._query_attribute(i.metadata))
            self.assertEqual(len(intervals), 1)
            self.assertEqual(intervals[0], i)

    def test_query_interval(self):
        intervals = list(self.im_2._query_interval((1, 2)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

        intervals = list(self.im_2._query_interval((3, 4)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = {repr(i) for i in self.im_2._query_interval((1, 7))}
        self.assertEqual(len(intervals), 2)
        self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals})

    def test_query_interval_upper_bound(self):
        intervals = list(
            self.im_2._query_interval(
                (self.upper_bound - 1, self.upper_bound)))
        self.assertEqual(intervals, [self.im_2_1])

    def test_query(self):
        intervals = list(
            self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

    def test_query_empty(self):
        intervals = list(self.im_1.query())
        self.assertEqual(len(intervals), 0)

    def test_query_no_hits(self):
        intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)]))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

        intervals = list(
            self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

    def test_query_interval_only(self):
        for loc in [[(1, 7)], [(1, 2), (3, 4)]]:
            intervals = list(self.im_2.query(bounds=loc))
            self.assertEqual(len(intervals), 2)
            self.assertEqual(intervals[0], self.im_2_1)
            self.assertEqual(intervals[1], self.im_2_2)

    def test_query_metadata_only(self):
        intervals = list(self.im_2.query(metadata={'gene': 'sagB'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = list(self.im_2.query(metadata={'bound': 0}))
        self.assertEqual(len(intervals), 2)
        self.assertEqual(intervals[0], self.im_2_1)
        self.assertEqual(intervals[1], self.im_2_2)

    def test_drop(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl])
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], self.im_2_2)
        # test the intvl was set to dropped
        self.assertTrue(intvl.dropped)

    def test_drop_all(self):
        self.im_2.drop(self.im_2._intervals)
        self.assertEqual(self.im_2, self.im_empty)

    def test_drop_negate(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl], negate=True)
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], intvl)
        # test the dropped intvl was set to dropped
        self.assertTrue(self.im_2_2.dropped)

    def test_reverse(self):
        self.im_2._reverse()
        Interval(interval_metadata=self.im_empty,
                 bounds=[(0, 6), (8, 9)],
                 metadata={
                     'gene': 'sagA',
                     'bound': 0
                 })
        Interval(interval_metadata=self.im_empty,
                 bounds=[(5, 7)],
                 metadata={
                     'gene': 'sagB',
                     'bound': 0,
                     'spam': [0]
                 })
        self.assertEqual(self.im_2, self.im_empty)

    def test_eq_ne(self):
        im1 = IntervalMetadata(10)
        im1.add(metadata={
            'gene': 'sagA',
            'bound': '0'
        },
                bounds=[(0, 2), (4, 7)])
        im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)])

        # The ordering shouldn't matter
        im2 = IntervalMetadata(10)
        im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)])
        im2.add(metadata={
            'gene': 'sagA',
            'bound': '0'
        },
                bounds=[(0, 2), (4, 7)])

        im3 = IntervalMetadata(10)
        im3.add(metadata={
            'gene': 'sagA',
            'bound': '3'
        },
                bounds=[(0, 2), (4, 7)])
        im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)])

        self.assertReallyEqual(im1, im2)
        self.assertReallyNotEqual(im1, im3)

    def test_ne_diff_bounds(self):
        im1 = IntervalMetadata(10)
        im2 = IntervalMetadata(9)
        intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}}
        im1.add(**intvl)
        im2.add(**intvl)
        self.assertReallyNotEqual(im1, im2)

    def test_repr(self):
        exp = '''0 interval features
-------------------'''
        self.assertEqual(repr(self.im_empty), exp)

        self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'})

        exp = '''1 interval feature
------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)'''
        self.assertRegex(repr(self.im_empty), exp)

        self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'})
        exp = '''6 interval features
-------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)
...
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)'''
        self.assertRegex(repr(self.im_empty), exp)
Esempio n. 4
0
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin):
    def setUp(self):
        self.upper_bound = 10
        self.im_empty = IntervalMetadata(self.upper_bound)
        self.im_1 = IntervalMetadata(self.upper_bound)
        self.im_1_1 = Interval(
            interval_metadata=self.im_1,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2 = IntervalMetadata(self.upper_bound)
        self.im_2_1 = Interval(
            interval_metadata=self.im_2,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2_2 = Interval(
            interval_metadata=self.im_2,
            bounds=[(3, 5)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_copy_empty(self):
        obs = copy(self.im_empty)
        self.assertEqual(obs, self.im_empty)
        self.assertIsNot(obs._intervals, self.im_empty._intervals)
        self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_copy(self):
        obs = copy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_deepcopy(self):
        obs = deepcopy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1.metadata, i2.metadata)

        i2.metadata['spam'].append(1)
        self.assertEqual(i2.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]})
        self.assertEqual(i1.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_deepcopy_memo_is_respected(self):
        memo = {}
        deepcopy(self.im_1, memo)
        self.assertGreater(len(memo), 2)

    def test_init(self):
        self.assertFalse(self.im_empty._is_stale_tree)
        self.assertEqual(self.im_empty._intervals, [])

    def test_init_upper_bound_lt_lower_bound(self):
        try:
            IntervalMetadata(0)
        except ValueError:
            self.fail('`IntervalMetdata` raised ValueError unexpectedly')
        with self.assertRaises(ValueError):
            IntervalMetadata(-1)

    def test_num_interval_features(self):
        self.assertEqual(self.im_empty.num_interval_features, 0)
        self.assertEqual(self.im_1.num_interval_features, 1)
        self.assertEqual(self.im_2.num_interval_features, 2)

    def test_duplicate(self):
        '''Test query and drop methods on duplicate Intervals.'''
        intvl_1 = self.im_empty.add([(1, 2)])
        intvl_2 = self.im_empty.add([(1, 2)])
        self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2)
        self.im_empty.drop([intvl_1])
        self.assertEqual(len(self.im_empty._intervals), 1)
        self.assertTrue(self.im_empty._intervals[0] is intvl_2)

    def test_duplicate_bounds(self):
        intvl = self.im_empty.add([(1, 2), (1, 2)])
        intvls = list(self.im_empty.query([(1, 2)]))
        self.assertEqual(len(intvls), 1)
        self.assertTrue(intvl is intvls[0])

    def test_concat_empty(self):
        for i in 0, 1, 2:
            obs = IntervalMetadata.concat([self.im_empty] * i)
            exp = IntervalMetadata(self.upper_bound * i)
            self.assertEqual(obs, exp)

        obs = IntervalMetadata.concat([])
        self.assertEqual(obs, IntervalMetadata(0))

    def test_concat(self):
        im1 = IntervalMetadata(3)
        im2 = IntervalMetadata(4)
        im3 = IntervalMetadata(5)
        im1.add([(0, 2)], [(True, True)])
        im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'})
        im2.add([(2, 4)], metadata={'gene': 'sagB'})
        im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'})
        obs = IntervalMetadata.concat([im1, im2, im3])

        exp = IntervalMetadata(12)
        exp.add(bounds=[(0, 2)], fuzzy=[(True, True)])
        exp.add(bounds=[(3, 6)], fuzzy=[(True, False)],
                metadata={'gene': 'sagA'})
        exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'})
        exp.add(bounds=[(8, 12)], fuzzy=[(False, True)],
                metadata={'gene': 'sagC'})
        self.assertEqual(obs, exp)

    def test_sort(self):
        interval = Interval(
            self.im_2,
            [(1, 2), (3, 8)],
            metadata={'gene': 'sagA',  'bound': 0})
        im = deepcopy(self.im_2)
        self.im_2.sort(False)
        # check sorting does not have other side effects
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [self.im_2_2, self.im_2_1, interval])

        self.im_2.sort()
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [interval, self.im_2_1, self.im_2_2])

        self.im_empty.sort()
        self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound))

    def test_add_eq_upper_bound(self):
        self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)],
                          metadata={'gene': 'sagA',  'bound': 0})
        self.assertTrue(self.im_empty._is_stale_tree)
        interval = self.im_empty._intervals[0]
        self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)])
        self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0})
        self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree))

    def test_add_gt_upper_bound(self):
        with self.assertRaises(ValueError):
            self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)],
                              metadata={'gene': 'sagA',  'bound': 0})

    def test_add_eq_start_end_bound(self):
        for i in 0, 1, self.upper_bound:
            with self.assertRaisesRegex(ValueError, '{i}.*{i}'.format(i=i)):
                self.im_empty.add(bounds=[(i, i)],
                                  metadata={'gene': 'sagA',  'bound': 0})

    def test_query_attribute(self):
        intervals = self.im_2._query_attribute({})
        for i, j in zip(intervals, self.im_2._intervals):
            self.assertEqual(i, j)

        intervals = list(self.im_2._query_attribute(None))
        self.assertEqual(len(intervals), 0)

        for i in self.im_2._intervals:
            intervals = list(self.im_2._query_attribute(i.metadata))
            self.assertEqual(len(intervals), 1)
            self.assertEqual(intervals[0], i)

    def test_query_interval(self):
        intervals = list(self.im_2._query_interval((1, 2)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

        intervals = list(self.im_2._query_interval((3, 4)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = {repr(i) for i in self.im_2._query_interval((1, 7))}
        self.assertEqual(len(intervals), 2)
        self.assertSetEqual(intervals,
                            {repr(i) for i in self.im_2._intervals})

    def test_query_interval_upper_bound(self):
        intervals = list(self.im_2._query_interval((self.upper_bound-1,
                                                    self.upper_bound)))
        self.assertEqual(intervals, [self.im_2_1])

    def test_query(self):
        intervals = list(self.im_2.query(bounds=[(1, 5)],
                                         metadata={'gene': 'sagA'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

    def test_query_empty(self):
        intervals = list(self.im_1.query())
        self.assertEqual(len(intervals), 0)

    def test_query_no_hits(self):
        intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)]))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(bounds=[(1, 2)],
                                         metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

    def test_query_interval_only(self):
        for loc in [[(1, 7)],
                    [(1, 2), (3, 4)]]:
            intervals = list(self.im_2.query(bounds=loc))
            self.assertEqual(len(intervals), 2)
            self.assertEqual(intervals[0], self.im_2_1)
            self.assertEqual(intervals[1], self.im_2_2)

    def test_query_metadata_only(self):
        intervals = list(self.im_2.query(metadata={'gene': 'sagB'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = list(self.im_2.query(metadata={'bound': 0}))
        self.assertEqual(len(intervals), 2)
        self.assertEqual(intervals[0], self.im_2_1)
        self.assertEqual(intervals[1], self.im_2_2)

    def test_drop(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl])
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], self.im_2_2)
        # test the intvl was set to dropped
        self.assertTrue(intvl.dropped)

    def test_drop_all(self):
        self.im_2.drop(self.im_2._intervals)
        self.assertEqual(self.im_2, self.im_empty)

    def test_reverse(self):
        self.im_2._reverse()
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(0, 6), (8, 9)],
            metadata={'gene': 'sagA',  'bound': 0})
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(5, 7)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})
        self.assertEqual(self.im_2, self.im_empty)

    def test_eq_ne(self):
        im1 = IntervalMetadata(10)
        im1.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])
        im1.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        # The ordering shouldn't matter
        im2 = IntervalMetadata(10)
        im2.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])
        im2.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])

        im3 = IntervalMetadata(10)
        im3.add(metadata={'gene': 'sagA', 'bound': '3'},
                bounds=[(0, 2), (4, 7)])
        im3.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        self.assertReallyEqual(im1, im2)
        self.assertReallyNotEqual(im1, im3)

    def test_ne_diff_bounds(self):
        im1 = IntervalMetadata(10)
        im2 = IntervalMetadata(9)
        intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}}
        im1.add(**intvl)
        im2.add(**intvl)
        self.assertReallyNotEqual(im1, im2)

    def test_repr(self):
        exp = '''0 interval features
-------------------'''
        self.assertEqual(repr(self.im_empty), exp)

        self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'})

        exp = '''1 interval feature
------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)'''
        self.assertRegex(repr(self.im_empty), exp)

        self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'})
        exp = '''6 interval features
-------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)
...
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)'''
        self.assertRegex(repr(self.im_empty), exp)
Esempio n. 5
0
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin):
    def setUp(self):
        self.upper_bound = 10
        self.im_empty = IntervalMetadata(self.upper_bound)
        self.im_1 = IntervalMetadata(self.upper_bound)
        self.im_1_1 = Interval(
            interval_metadata=self.im_1,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2 = IntervalMetadata(self.upper_bound)
        self.im_2_1 = Interval(
            interval_metadata=self.im_2,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2_2 = Interval(
            interval_metadata=self.im_2,
            bounds=[(3, 5)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_copy_empty(self):
        obs = copy(self.im_empty)
        self.assertEqual(obs, self.im_empty)
        self.assertIsNot(obs._intervals, self.im_empty._intervals)
        self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_copy(self):
        obs = copy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_deepcopy(self):
        obs = deepcopy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1.metadata, i2.metadata)

        i2.metadata['spam'].append(1)
        self.assertEqual(i2.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]})
        self.assertEqual(i1.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_deepcopy_memo_is_respected(self):
        memo = {}
        deepcopy(self.im_1, memo)
        self.assertGreater(len(memo), 2)

    def test_init(self):
        self.assertFalse(self.im_empty._is_stale_tree)
        self.assertEqual(self.im_empty._intervals, [])

    def test_init_upper_bound_lt_lower_bound(self):
        # test that no exception is raised
        IntervalMetadata(0)

        with self.assertRaises(ValueError):
            IntervalMetadata(-1)

    def test_num_interval_features(self):
        self.assertEqual(self.im_empty.num_interval_features, 0)
        self.assertEqual(self.im_1.num_interval_features, 1)
        self.assertEqual(self.im_2.num_interval_features, 2)

    def test_duplicate(self):
        '''Test query and drop methods on duplicate Intervals.'''
        intvl_1 = self.im_empty.add([(1, 2)])
        intvl_2 = self.im_empty.add([(1, 2)])
        self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2)
        self.im_empty.drop([intvl_1])
        self.assertEqual(len(self.im_empty._intervals), 1)
        self.assertTrue(self.im_empty._intervals[0] is intvl_2)

    def test_duplicate_bounds(self):
        intvl = self.im_empty.add([(1, 2), (1, 2)])
        intvls = list(self.im_empty.query([(1, 2)]))
        self.assertEqual(len(intvls), 1)
        self.assertTrue(intvl is intvls[0])

    def test_concat_empty(self):
        for i in 0, 1, 2:
            obs = IntervalMetadata.concat([self.im_empty] * i)
            exp = IntervalMetadata(self.upper_bound * i)
            self.assertEqual(obs, exp)

        obs = IntervalMetadata.concat([])
        self.assertEqual(obs, IntervalMetadata(0))

    def test_concat(self):
        im1 = IntervalMetadata(3)
        im2 = IntervalMetadata(4)
        im3 = IntervalMetadata(5)
        im1.add([(0, 2)], [(True, True)])
        im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'})
        im2.add([(2, 4)], metadata={'gene': 'sagB'})
        im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'})
        obs = IntervalMetadata.concat([im1, im2, im3])

        exp = IntervalMetadata(12)
        exp.add(bounds=[(0, 2)], fuzzy=[(True, True)])
        exp.add(bounds=[(3, 6)], fuzzy=[(True, False)],
                metadata={'gene': 'sagA'})
        exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'})
        exp.add(bounds=[(8, 12)], fuzzy=[(False, True)],
                metadata={'gene': 'sagC'})
        self.assertEqual(obs, exp)

    def test_sort(self):
        interval = Interval(
            self.im_2,
            [(1, 2), (3, 8)],
            metadata={'gene': 'sagA',  'bound': 0})
        im = deepcopy(self.im_2)
        self.im_2.sort(False)
        # check sorting does not have other side effects
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [self.im_2_2, self.im_2_1, interval])

        self.im_2.sort()
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [interval, self.im_2_1, self.im_2_2])

        self.im_empty.sort()
        self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound))

    def test_add_eq_upper_bound(self):
        self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)],
                          metadata={'gene': 'sagA',  'bound': 0})
        self.assertTrue(self.im_empty._is_stale_tree)
        interval = self.im_empty._intervals[0]
        self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)])
        self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0})
        self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree))

    def test_add_gt_upper_bound(self):
        with self.assertRaises(ValueError):
            self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)],
                              metadata={'gene': 'sagA',  'bound': 0})

    def test_add_eq_start_end_bound(self):
        for i in 0, 1, self.upper_bound:
            # test that no exception is raised
            self.im_empty.add(bounds=[(i, i)],
                              metadata={'gene': 'sagA',  'bound': 0})

    def test_query_attribute(self):
        intervals = self.im_2._query_attribute({})
        for i, j in zip(intervals, self.im_2._intervals):
            self.assertEqual(i, j)

        intervals = list(self.im_2._query_attribute(None))
        self.assertEqual(len(intervals), 0)

        for i in self.im_2._intervals:
            intervals = list(self.im_2._query_attribute(i.metadata))
            self.assertEqual(len(intervals), 1)
            self.assertEqual(intervals[0], i)

    def test_query_interval(self):
        intervals = list(self.im_2._query_interval((1, 2)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

        intervals = list(self.im_2._query_interval((3, 4)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = {repr(i) for i in self.im_2._query_interval((1, 7))}
        self.assertEqual(len(intervals), 2)
        self.assertSetEqual(intervals,
                            {repr(i) for i in self.im_2._intervals})

    def test_query_interval_upper_bound(self):
        intervals = list(self.im_2._query_interval((self.upper_bound-1,
                                                    self.upper_bound)))
        self.assertEqual(intervals, [self.im_2_1])

    def test_query(self):
        intervals = list(self.im_2.query(bounds=[(1, 5)],
                                         metadata={'gene': 'sagA'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

    def test_query_empty(self):
        intervals = list(self.im_1.query())
        self.assertEqual(len(intervals), 0)

    def test_query_no_hits(self):
        intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)]))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(bounds=[(1, 2)],
                                         metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

    def test_query_interval_only(self):
        for loc in [[(1, 7)],
                    [(1, 2), (3, 4)]]:
            intervals = list(self.im_2.query(bounds=loc))
            self.assertEqual(len(intervals), 2)
            self.assertEqual(intervals[0], self.im_2_1)
            self.assertEqual(intervals[1], self.im_2_2)

    def test_query_metadata_only(self):
        intervals = list(self.im_2.query(metadata={'gene': 'sagB'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = list(self.im_2.query(metadata={'bound': 0}))
        self.assertEqual(len(intervals), 2)
        self.assertEqual(intervals[0], self.im_2_1)
        self.assertEqual(intervals[1], self.im_2_2)

    def test_drop(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl])
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], self.im_2_2)
        # test the intvl was set to dropped
        self.assertTrue(intvl.dropped)

    def test_drop_all(self):
        self.im_2.drop(self.im_2._intervals)
        self.assertEqual(self.im_2, self.im_empty)

    def test_reverse(self):
        self.im_2._reverse()
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(0, 6), (8, 9)],
            metadata={'gene': 'sagA',  'bound': 0})
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(5, 7)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})
        self.assertEqual(self.im_2, self.im_empty)

    def test_eq_ne(self):
        im1 = IntervalMetadata(10)
        im1.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])
        im1.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        # The ordering shouldn't matter
        im2 = IntervalMetadata(10)
        im2.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])
        im2.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])

        im3 = IntervalMetadata(10)
        im3.add(metadata={'gene': 'sagA', 'bound': '3'},
                bounds=[(0, 2), (4, 7)])
        im3.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        self.assertReallyEqual(im1, im2)
        self.assertReallyNotEqual(im1, im3)

    def test_ne_diff_bounds(self):
        im1 = IntervalMetadata(10)
        im2 = IntervalMetadata(9)
        intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}}
        im1.add(**intvl)
        im2.add(**intvl)
        self.assertReallyNotEqual(im1, im2)

    def test_repr(self):
        exp = '''0 interval features
-------------------'''
        self.assertEqual(repr(self.im_empty), exp)

        self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'})

        exp = '''1 interval feature
------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)'''
        self.assertRegex(repr(self.im_empty), exp)

        self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'})
        exp = '''6 interval features
-------------------
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)
...
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)
Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \
fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)'''
        self.assertRegex(repr(self.im_empty), exp)
Esempio n. 6
0
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin):
    def setUp(self):
        self.upper_bound = 10
        self.im_empty = IntervalMetadata(self.upper_bound)
        self.im_1 = IntervalMetadata(self.upper_bound)
        self.im_1_1 = Interval(
            interval_metadata=self.im_1,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2 = IntervalMetadata(self.upper_bound)
        self.im_2_1 = Interval(
            interval_metadata=self.im_2,
            bounds=[(1, 2), (4, self.upper_bound)],
            metadata={'gene': 'sagA',  'bound': 0})
        self.im_2_2 = Interval(
            interval_metadata=self.im_2,
            bounds=[(3, 5)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_copy_empty(self):
        obs = copy(self.im_empty)
        self.assertEqual(obs, self.im_empty)
        self.assertIsNot(obs._intervals, self.im_empty._intervals)
        self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_copy(self):
        obs = copy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_deepcopy(self):
        obs = deepcopy(self.im_2)
        self.assertEqual(obs, self.im_2)
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)

        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1.metadata, i2.metadata)

        i2.metadata['spam'].append(1)
        self.assertEqual(i2.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]})
        self.assertEqual(i1.metadata,
                         {'gene': 'sagB', 'bound': 0, 'spam': [0]})

    def test_deepcopy_memo_is_respected(self):
        memo = {}
        deepcopy(self.im_1, memo)
        self.assertGreater(len(memo), 2)

    def test_init(self):
        self.assertFalse(self.im_empty._is_stale_tree)
        self.assertEqual(self.im_empty._intervals, [])

    def test_init_upper_bound_lt_lower_bound(self):
        # test that no exception is raised
        IntervalMetadata(0)

        with self.assertRaises(ValueError):
            IntervalMetadata(-1)

    def test_upper_bound_is_none(self):
        im = IntervalMetadata(None)
        # should not raise error
        im.add([(0, 1000000000)])
        self.assertIsNone(im.upper_bound)
        with self.assertRaisesRegex(
                TypeError, r'upper bound is `None`'):
            im._reverse()
        with self.assertRaisesRegex(
                TypeError, r'upper bound is `None`'):
            IntervalMetadata.concat([self.im_1, im])

    def test_init_copy_from(self):
        for i in [None, 99, 999]:
            obs = IntervalMetadata(i, self.im_1)
            exp = IntervalMetadata(i)
            exp.add(bounds=[(1, 2), (4, self.upper_bound)],
                    metadata={'gene': 'sagA',  'bound': 0})
            self.assertEqual(obs, exp)

    def test_init_copy_from_empty(self):
        for i in [None, 0, 9, 99, 999]:
            obs = IntervalMetadata(i, self.im_empty)
            exp = IntervalMetadata(i)
            self.assertEqual(obs, exp)
            # test it is shallow copy
            self.assertIsNot(obs._intervals, self.im_empty._intervals)
            self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree)

    def test_init_copy_from_shallow_copy(self):
        obs = IntervalMetadata(self.upper_bound, self.im_2)
        self.assertEqual(self.im_2, obs)
        # test it is shallow copy
        self.assertIsNot(obs._intervals, self.im_2._intervals)
        self.assertIsNot(obs._interval_tree, self.im_2._interval_tree)
        for i in range(self.im_2.num_interval_features):
            i1, i2 = obs._intervals[i], self.im_2._intervals[i]
            self.assertIsNot(i1, i2)
            self.assertIsNot(i1.bounds, i2.bounds)
            self.assertIsNot(i1.fuzzy, i2.fuzzy)
            self.assertIsNot(i1._interval_metadata, i2._interval_metadata)
            self.assertIsNot(i1.metadata, i2.metadata)
            for k in i1.metadata:
                self.assertIs(i1.metadata[k], i2.metadata[k])

    def test_init_copy_from_error(self):
        i = self.upper_bound - 1
        with self.assertRaisesRegex(
                ValueError, r'larger than upper bound \(%r\)' % i):
            IntervalMetadata(i, self.im_2)

    def test_num_interval_features(self):
        self.assertEqual(self.im_empty.num_interval_features, 0)
        self.assertEqual(self.im_1.num_interval_features, 1)
        self.assertEqual(self.im_2.num_interval_features, 2)

    def test_duplicate(self):
        '''Test query and drop methods on duplicate Intervals.'''
        intvl_1 = self.im_empty.add([(1, 2)])
        intvl_2 = self.im_empty.add([(1, 2)])
        self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2)
        self.im_empty.drop([intvl_1])
        self.assertEqual(len(self.im_empty._intervals), 1)
        self.assertTrue(self.im_empty._intervals[0] is intvl_2)

    def test_duplicate_bounds(self):
        intvl = self.im_empty.add([(1, 2), (1, 2)])
        intvls = list(self.im_empty.query([(1, 2)]))
        self.assertEqual(len(intvls), 1)
        self.assertTrue(intvl is intvls[0])

    def test_concat_empty(self):
        for i in 0, 1, 2:
            obs = IntervalMetadata.concat([self.im_empty] * i)
            exp = IntervalMetadata(self.upper_bound * i)
            self.assertEqual(obs, exp)

        obs = IntervalMetadata.concat([])
        self.assertEqual(obs, IntervalMetadata(0))

    def test_concat(self):
        im1 = IntervalMetadata(3)
        im2 = IntervalMetadata(4)
        im3 = IntervalMetadata(5)
        im1.add([(0, 2)], [(True, True)])
        im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'})
        im2.add([(2, 4)], metadata={'gene': 'sagB'})
        im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'})
        obs = IntervalMetadata.concat([im1, im2, im3])

        exp = IntervalMetadata(12)
        exp.add(bounds=[(0, 2)], fuzzy=[(True, True)])
        exp.add(bounds=[(3, 6)], fuzzy=[(True, False)],
                metadata={'gene': 'sagA'})
        exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'})
        exp.add(bounds=[(8, 12)], fuzzy=[(False, True)],
                metadata={'gene': 'sagC'})
        self.assertEqual(obs, exp)

    def test_merge(self):
        # empty + empty
        im = IntervalMetadata(self.upper_bound)
        self.im_empty.merge(im)
        self.assertEqual(self.im_empty, im)
        # empty + non-empty
        self.im_empty.merge(self.im_1)
        self.assertEqual(self.im_empty, self.im_1)
        # non-empty + non-empty
        self.im_empty.merge(self.im_2)
        self.im_2.merge(self.im_1)
        self.assertEqual(self.im_empty, self.im_2)

    def test_merge_unequal_upper_bounds(self):
        n = 3
        im1 = IntervalMetadata(n)
        for im in [self.im_empty, self.im_1]:
            with self.assertRaisesRegex(
                    ValueError,
                    r'not equal \(%d != %d\)' % (self.upper_bound, n)):
                im.merge(im1)

    def test_merge_to_unbounded(self):
        for im in [self.im_empty, self.im_1, IntervalMetadata(None)]:
            obs = IntervalMetadata(None)
            obs.merge(im)
            self.assertIsNone(obs.upper_bound)
            self.assertEqual(obs._intervals, im._intervals)

    def test_merge_unbounded_to_bounded(self):
        im = IntervalMetadata(None)
        with self.assertRaisesRegex(
                ValueError,
                r'Cannot merge an unbound IntervalMetadata object '
                'to a bounded one'):
            self.im_1.merge(im)
        # original im is not changed
        self.assertIsNone(im.upper_bound)
        self.assertEqual(im._intervals, [])

    def test_sort(self):
        interval = Interval(
            self.im_2,
            [(1, 2), (3, 8)],
            metadata={'gene': 'sagA',  'bound': 0})
        im = deepcopy(self.im_2)
        self.im_2.sort(False)
        # check sorting does not have other side effects
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [self.im_2_2, self.im_2_1, interval])

        self.im_2.sort()
        self.assertEqual(im, self.im_2)
        self.assertEqual(self.im_2._intervals,
                         [interval, self.im_2_1, self.im_2_2])

        self.im_empty.sort()
        self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound))

    def test_add_eq_upper_bound(self):
        self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)],
                          metadata={'gene': 'sagA',  'bound': 0})
        self.assertTrue(self.im_empty._is_stale_tree)
        interval = self.im_empty._intervals[0]
        self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)])
        self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0})
        self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree))

    def test_add_gt_upper_bound(self):
        with self.assertRaises(ValueError):
            self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)],
                              metadata={'gene': 'sagA',  'bound': 0})

    def test_add_eq_start_end_bound(self):
        for i in 0, 1, self.upper_bound:
            # test that no exception is raised
            self.im_empty.add(bounds=[(i, i)],
                              metadata={'gene': 'sagA',  'bound': 0})

    def test_query_attribute(self):
        intervals = self.im_2._query_attribute({})
        for i, j in zip(intervals, self.im_2._intervals):
            self.assertEqual(i, j)

        intervals = list(self.im_2._query_attribute(None))
        self.assertEqual(len(intervals), 0)

        for i in self.im_2._intervals:
            intervals = list(self.im_2._query_attribute(i.metadata))
            self.assertEqual(len(intervals), 1)
            self.assertEqual(intervals[0], i)

    def test_query_interval(self):
        intervals = list(self.im_2._query_interval((1, 2)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

        intervals = list(self.im_2._query_interval((3, 4)))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = {repr(i) for i in self.im_2._query_interval((1, 7))}
        self.assertEqual(len(intervals), 2)
        self.assertSetEqual(intervals,
                            {repr(i) for i in self.im_2._intervals})

    def test_query_interval_upper_bound(self):
        intervals = list(self.im_2._query_interval((self.upper_bound-1,
                                                    self.upper_bound)))
        self.assertEqual(intervals, [self.im_2_1])

    def test_query(self):
        intervals = list(self.im_2.query(bounds=[(1, 5)],
                                         metadata={'gene': 'sagA'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_1)

    def test_query_empty(self):
        intervals = list(self.im_1.query())
        self.assertEqual(len(intervals), 0)

    def test_query_no_hits(self):
        intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)]))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

        intervals = list(self.im_2.query(bounds=[(1, 2)],
                                         metadata={'gene': 'sagC'}))
        self.assertEqual(len(intervals), 0)

    def test_query_interval_only(self):
        for loc in [[(1, 7)],
                    [(1, 2), (3, 4)]]:
            intervals = list(self.im_2.query(bounds=loc))
            self.assertEqual(len(intervals), 2)
            self.assertEqual(intervals[0], self.im_2_1)
            self.assertEqual(intervals[1], self.im_2_2)

    def test_query_metadata_only(self):
        intervals = list(self.im_2.query(metadata={'gene': 'sagB'}))
        self.assertEqual(len(intervals), 1)
        self.assertEqual(intervals[0], self.im_2_2)

        intervals = list(self.im_2.query(metadata={'bound': 0}))
        self.assertEqual(len(intervals), 2)
        self.assertEqual(intervals[0], self.im_2_1)
        self.assertEqual(intervals[1], self.im_2_2)

    def test_drop(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl])
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], self.im_2_2)
        # test the intvl was set to dropped
        self.assertTrue(intvl.dropped)

    def test_drop_all(self):
        self.im_2.drop(self.im_2._intervals)
        self.assertEqual(self.im_2, self.im_empty)

    def test_drop_negate(self):
        intvl = self.im_2._intervals[0]
        self.im_2.drop([intvl], negate=True)
        self.assertEqual(len(self.im_2._intervals), 1)
        self.assertEqual(self.im_2._intervals[0], intvl)
        # test the dropped intvl was set to dropped
        self.assertTrue(self.im_2_2.dropped)

    def test_reverse(self):
        self.im_2._reverse()
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(0, 6), (8, 9)],
            metadata={'gene': 'sagA',  'bound': 0})
        Interval(
            interval_metadata=self.im_empty,
            bounds=[(5, 7)],
            metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]})
        self.assertEqual(self.im_2, self.im_empty)

    def test_eq_ne(self):
        im1 = IntervalMetadata(10)
        im1.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])
        im1.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        # The ordering shouldn't matter
        im2 = IntervalMetadata(10)
        im2.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])
        im2.add(metadata={'gene': 'sagA', 'bound': '0'},
                bounds=[(0, 2), (4, 7)])

        im3 = IntervalMetadata(10)
        im3.add(metadata={'gene': 'sagA', 'bound': '3'},
                bounds=[(0, 2), (4, 7)])
        im3.add(metadata={'gene': 'sagB', 'bound': '3'},
                bounds=[(3, 5)])

        self.assertReallyEqual(im1, im2)
        self.assertReallyNotEqual(im1, im3)

    def test_ne_diff_bounds(self):
        im1 = IntervalMetadata(10)
        im2 = IntervalMetadata(9)
        intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}}
        im1.add(**intvl)
        im2.add(**intvl)
        self.assertReallyNotEqual(im1, im2)

    def test_repr(self):
        exp = '''0 interval features
-------------------'''
        self.assertEqual(repr(self.im_empty), exp)

        self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'})

        exp = ("1 interval feature\n"
               "------------------\n"
               r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], "
               r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)")
        self.assertRegex(repr(self.im_empty), exp)

        self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'})
        self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'})
        exp = ("6 interval features\n"
               "-------------------\n"
               r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], "
               r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)\n"
               r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], "
               r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)\n"
               r"...\n"
               r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], "
               r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)\n"
               r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], "
               r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)")
        self.assertRegex(repr(self.im_empty), exp)