예제 #1
0
def test_merge_equals_with_dupes():
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    assert orig == t

    # one dupe
    assert t.containsi(4, 7, '[4,7)')
    t.addi(4, 7, 'foo')
    assert len(t) == len(orig) + 1
    assert orig != t

    t.merge_equals()
    t.verify()
    assert t != orig
    assert t.containsi(4, 7)
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')

    # two dupes
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.addi(4, 7, 'foo')
    assert t.containsi(10, 12, '[10,12)')
    t.addi(10, 12, 'bar')
    assert len(t) == len(orig) + 2
    assert t != orig

    t.merge_equals()
    t.verify()
    assert t != orig
    assert t.containsi(4, 7)
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')
    assert t.containsi(10, 12)
    assert not t.containsi(10, 12, 'bar')
    assert not t.containsi(10, 12, '[10,12)')
예제 #2
0
def find_5prime_for_inversion(start, end, gtj: list):
    """
    We have two canonical transcripts, and we want to find out which one is the 5' and the 3'
    :param start: beginning of the inversion
    :param end: end of inversion
    :param gtj: list of ENSEMBL genes
    :return: tuple with 5' as the first, and 3' as the second
    """
    # first generate whole gene intervals:
    gtj0_iv = IntervalTree.from_tuples([(gtj[0].exons.begin(),
                                         gtj[0].exons.end())])
    gtj1_iv = IntervalTree.from_tuples([(gtj[1].exons.begin(),
                                         gtj[1].exons.end())])
    gene_tuple = ()
    if gtj[0].strand > 0:  # first gene is forward
        if gtj0_iv.at(start):  # and the start point is in the first gene
            gene_tuple = (gtj[0], gtj[1])  # leave as it is
        else:
            gene_tuple = (gtj[1], gtj[0])
    else:  # first gene is reverse
        if gtj0_iv.at(
                start
        ):  # this case the first gene will be the 3', so have to swap
            gene_tuple = (gtj[1], gtj[0])
        else:
            gene_tuple = (gtj[0], gtj[1])
    return gene_tuple
예제 #3
0
def test_merge_overlaps_reducer_with_initializer():
    def reducer(old, new):
        return old + [new]

    # empty tree
    e = IntervalTree()
    e.merge_overlaps(data_reducer=reducer, data_initializer=[])
    e.verify()
    assert not e

    # one Interval in tree
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_overlaps(data_reducer=reducer, data_initializer=[])
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, ['hello'])]

    # many Intervals in tree, with gap
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_overlaps(data_reducer=reducer, data_initializer=[])
    t.verify()
    assert len(t) == 2
    assert sorted(t) == [
        Interval(1, 2, ['[1,2)']),
        Interval(4, 15, [
            '[4,7)',
            '[5,9)',
            '[6,10)',
            '[8,10)',
            '[8,15)',
            '[10,12)',
            '[12,14)',
            '[14,15)',
        ])
    ]
예제 #4
0
def find_5prime_for_inversion(start, end, gtj: list):
    """
    We have two canonical transcripts, and we want to find out which one is the 5' and the 3' .
    For inversions we will have actually two valid fusions, though we do not know which one is
    the correct one. Nevertheless, we are reporting both.
    :param start: beginning of the inversion
    :param end: end of inversion
    :param gtj: list of ENSEMBL genes
    :return: list with two tuples [(5',3'), (5',3') ]
    """
    # first generate whole gene intervals:
    gtj0_iv = IntervalTree.from_tuples([(gtj[0].exons.begin(),
                                         gtj[0].exons.end())])
    gtj1_iv = IntervalTree.from_tuples([(gtj[1].exons.begin(),
                                         gtj[1].exons.end())])
    gene_tuple = ()
    if gtj[0].strand > 0:  # first gene is forward
        if gtj0_iv.at(start):  # and the start point is in the first gene
            gene_tuple = (gtj[0], gtj[1])  # leave as it is
        else:
            gene_tuple = (gtj[1], gtj[0])
    else:  # first gene is reverse
        if gtj0_iv.at(
                start
        ):  # this case the first gene will be the 3', so have to swap
            gene_tuple = (gtj[1], gtj[0])
        else:
            gene_tuple = (gtj[0], gtj[1])
    return [gene_tuple, (gene_tuple[1], gene_tuple[0])]
예제 #5
0
def test_merge_equals_reducer_wo_initializer():
    def reducer(old, new):
        return "%s, %s" % (old, new)
    # empty tree
    e = IntervalTree()
    e.merge_equals(data_reducer=reducer)
    e.verify()
    assert not e

    # One Interval in tree, no change
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_equals(data_reducer=reducer)
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, 'hello')]

    # many Intervals in tree, no change
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_equals(data_reducer=reducer)
    t.verify()
    assert len(t) == len(orig)
    assert t == orig

    # many Intervals in tree, with change
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    t.addi(4, 7, 'foo')
    t.merge_equals(data_reducer=reducer)
    t.verify()
    assert len(t) == len(orig)
    assert t != orig
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')
    assert t.containsi(4, 7, '[4,7), foo')
예제 #6
0
def test_merge_neighbors_with_gap_nonstrict():
    def reducer(old, new):
        return "%s, %s" % (old, new)

    # default distance=1
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_neighbors(data_reducer=reducer, distance=1, strict=False)
    t.verify()
    assert len(t) == 2
    assert sorted(t) == [
        Interval(1, 2, '[1,2)'),
        Interval(
            4, 15,
            '[4,7), [5,9), [6,10), [8,10), [8,15), [10,12), [12,14), [14,15)'),
    ]
    # distance=2
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_neighbors(data_reducer=reducer, distance=2, strict=False)
    t.verify()
    assert len(t) == 1
    assert sorted(t) == [
        Interval(
            1, 15,
            '[1,2), [4,7), [5,9), [6,10), [8,10), [8,15), [10,12), [12,14), [14,15)'
        )
    ]
예제 #7
0
    def __init__(self, covariate, population):
        """
        Interpolates a covariate by population weighting.
        :param covariate: (pd.DataFrame)
        :param population: (pd.DataFrame)
        """
        # Covariates must be sorted by both age_group_id and age_lower because age_lower is not unique to age_group_id
        indices = ['location_id', 'sex_id', 'year_id', 'age_group_id']
        sort_order = indices + ['age_lower']

        self.covariate = covariate.sort_values(by=sort_order)
        self.population = population.sort_values(by=sort_order)

        self.location_ids = self.covariate.location_id.unique()

        self.age_intervals = IntervalTree.from_tuples(
            self.covariate[['age_lower', 'age_upper', 'age_group_id']].values)
        self.time_intervals = IntervalTree.from_tuples([
            (t, t + 1, t) for t in self.covariate.year_id.unique()
        ])

        self.dict_cov = dict(
            zip(map(tuple, self.covariate[indices].values.tolist()),
                self.covariate['mean_value'].values))
        self.dict_pop = dict(
            zip(map(tuple, self.population[indices].values.tolist()),
                self.population['population'].values))
예제 #8
0
def test_merge_overlaps_reducer_with_initializer():
    def reducer(old, new):
        return old + [new]
    # empty tree
    e = IntervalTree()
    e.merge_overlaps(data_reducer=reducer, data_initializer=[])
    e.verify()
    assert not e

    # One Interval in tree
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_overlaps(data_reducer=reducer, data_initializer=[])
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, ['hello'])]

    # many Intervals in tree, with gap
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_overlaps(data_reducer=reducer, data_initializer=[])
    t.verify()
    assert len(t) == 2
    assert sorted(t) == [
        Interval(1, 2, ['[1,2)']),
        Interval(4, 15, [
            '[4,7)',
            '[5,9)',
            '[6,10)',
            '[8,10)',
            '[8,15)',
            '[10,12)',
            '[12,14)',
            '[14,15)',
        ])
    ]
예제 #9
0
def test_merge_overlaps_reducer_wo_initializer():
    def reducer(old, new):
        return "%s, %s" % (old, new)

    # empty tree
    e = IntervalTree()
    e.merge_overlaps(data_reducer=reducer)
    e.verify()
    assert not e

    # one Interval in tree
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_overlaps(data_reducer=reducer)
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, 'hello')]

    # many Intervals in tree, with gap
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_overlaps(data_reducer=reducer)
    t.verify()
    assert len(t) == 2
    assert sorted(t) == [
        Interval(1, 2, '[1,2)'),
        Interval(
            4, 15,
            '[4,7), [5,9), [6,10), [8,10), [8,15), [10,12), [12,14), [14,15)')
    ]
예제 #10
0
def test_merge_overlaps_reducer_wo_initializer():
    def reducer(old, new):
        return "%s, %s" % (old, new)
    # empty tree
    e = IntervalTree()
    e.merge_overlaps(data_reducer=reducer)
    e.verify()
    assert not e

    # One Interval in tree
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_overlaps(data_reducer=reducer)
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, 'hello')]

    # many Intervals in tree, with gap
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_overlaps(data_reducer=reducer)
    t.verify()
    assert len(t) == 2
    assert sorted(t) == [
        Interval(1, 2,'[1,2)'),
        Interval(4, 15, '[4,7), [5,9), [6,10), [8,10), [8,15), [10,12), [12,14), [14,15)')
    ]
예제 #11
0
def test_merge_equals_with_dupes():
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    assert orig == t

    # one dupe
    assert t.containsi(4, 7, '[4,7)')
    t.addi(4, 7, 'foo')
    assert len(t) == len(orig) + 1
    assert orig != t

    t.merge_equals()
    t.verify()
    assert t != orig
    assert t.containsi(4, 7)
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')

    # two dupes
    t = IntervalTree.from_tuples(data.ivs1.data)
    t.addi(4, 7, 'foo')
    assert t.containsi(10, 12, '[10,12)')
    t.addi(10, 12, 'bar')
    assert len(t) == len(orig) + 2
    assert t != orig

    t.merge_equals()
    t.verify()
    assert t != orig
    assert t.containsi(4, 7)
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')
    assert t.containsi(10, 12)
    assert not t.containsi(10, 12, 'bar')
    assert not t.containsi(10, 12, '[10,12)')
예제 #12
0
def test_merge_equals_wo_dupes():
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    assert orig == t

    t.merge_equals()
    t.verify()
    assert orig == t
예제 #13
0
def test_intersection():
    a = IntervalTree.from_tuples(data.ivs1.data)
    b = IntervalTree.from_tuples(data.ivs2.data)
    e = IntervalTree()

    # intersections with e
    assert a.intersection(e) == e
    assert b.intersection(e) == e
    assert e.intersection(e) == e

    # intersections with self
    assert a.intersection(a) == a
    assert b.intersection(b) == b

    # commutativity resulting in empty
    ab = a.intersection(b)
    ba = b.intersection(a)
    ab.verify()
    ba.verify()
    assert ab == ba
    assert len(ab) == 0  # no overlaps, so empty tree

    # commutativity on non-overlapping sets
    ab = a.union(b)
    ba = b.union(a)

    aba = ab.intersection(a)  # these should yield no change
    abb = ab.intersection(b)
    bab = ba.intersection(b)
    baa = ba.intersection(a)
    aba.verify()
    abb.verify()
    bab.verify()
    baa.verify()
    assert aba == a
    assert abb == b
    assert bab == b
    assert baa == a

    # commutativity with overlapping sets
    c = IntervalTree.from_tuples(data.ivs3.data)
    bc = b.intersection(c)
    cb = c.intersection(b)
    bc.verify()
    cb.verify()
    assert bc == cb
    assert len(bc) < len(b)
    assert len(bc) < len(c)
    assert len(bc) > 0

    assert b.containsi(13, 23)
    assert c.containsi(13, 23)
    assert bc.containsi(13, 23)

    assert not b.containsi(819, 828)
    assert not c.containsi(0, 1)
    assert not bc.containsi(819, 820)
    assert not bc.containsi(0, 1)
예제 #14
0
def test_merge_equals_wo_dupes():
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    assert orig == t

    t.merge_equals()
    t.verify()

    assert orig == t
예제 #15
0
def test_emptying_partial():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t[7:]
    t.remove_overlap(7, t.end())
    assert not t[7:]

    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t[:7]
    t.remove_overlap(t.begin(), 7)
    assert not t[:7]
예제 #16
0
def test_emptying_partial():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t[7:]
    t.remove_overlap(7, t.end())
    assert not t[7:]

    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t[:7]
    t.remove_overlap(t.begin(), 7)
    assert not t[:7]
예제 #17
0
def test_tree_bounds():
    def assert_tree_bounds(t):
        begin, end, _ = set(t).pop()
        for iv in t:
            if iv.begin < begin: begin = iv.begin
            if iv.end > end: end = iv.end
        assert t.begin() == begin
        assert t.end() == end

    assert_tree_bounds(IntervalTree.from_tuples(data.ivs1.data))
    assert_tree_bounds(IntervalTree.from_tuples(data.ivs2.data))
예제 #18
0
def test_tree_bounds():
    def assert_tree_bounds(t):
        begin, end, _ = set(t).pop()
        for iv in t:
            if iv.begin < begin: begin = iv.begin
            if iv.end > end: end = iv.end
        assert t.begin() == begin
        assert t.end() == end

    assert_tree_bounds(IntervalTree.from_tuples(data.ivs1.data))
    assert_tree_bounds(IntervalTree.from_tuples(data.ivs2.data))
예제 #19
0
def test_partial_iter_range():
    def assert_iter(t, limit):
        s = sorted(t)
        assert [i for i in t.iterOverlap()] == s

        s = sorted(iv for iv in t if iv.begin < limit)
        assert [i for i in t.iterOverlap(None, limit)] == s

        s = sorted(iv for iv in t if iv.end > limit)
        assert [i for i in t.iterOverlap(limit)] == s

    assert_iter(IntervalTree.from_tuples(data.ivs1.data), 7)
    assert_iter(IntervalTree.from_tuples(data.ivs2.data), -3)
예제 #20
0
def test_partial_get_query():
    def assert_get(t, limit):
        s = set(t)
        assert t[:] == s

        s = set(iv for iv in t if iv.begin < limit)
        assert t[:limit] == s

        s = set(iv for iv in t if iv.end > limit)
        assert t[limit:] == s

    assert_get(IntervalTree.from_tuples(data.ivs1.data), 7)
    assert_get(IntervalTree.from_tuples(data.ivs2.data), -3)
예제 #21
0
def test_partial_get_query():
    def assert_get(t, limit):
        s = set(t)
        assert t[:] == s

        s = set(iv for iv in t if iv.begin < limit)
        assert t[:limit] == s

        s = set(iv for iv in t if iv.end > limit)
        assert t[limit:] == s

    assert_get(IntervalTree.from_tuples(data.ivs1.data), 7)
    assert_get(IntervalTree.from_tuples(data.ivs2.data), -3)
예제 #22
0
def test_end_order_iter():
    def assert_iter(t, limit):
        keyFunc = cmp_to_key(lambda a, b: a.endCmp(b))
        s = sorted(t, key=keyFunc)
        assert [i for i in t.iterOverlap(endOrder=True)] == s

        s = sorted([iv for iv in t if iv.begin < limit], key=keyFunc)
        assert [i for i in t.iterOverlap(None, limit, endOrder=True)] == s

        s = sorted([iv for iv in t if iv.end > limit], key=keyFunc)
        assert [i for i in t.iterOverlap(limit, endOrder=True)] == s

    assert_iter(IntervalTree.from_tuples(data.ivs1.data), 7)
    assert_iter(IntervalTree.from_tuples(data.ivs2.data), -3)
예제 #23
0
def test_merge_overlaps_gapless():
    # default strict=True
    t = IntervalTree.from_tuples(data.ivs2.data)
    t.merge_overlaps()
    t.verify()
    assert [(iv.begin, iv.end, iv.data) for iv in sorted(t)] == data.ivs2.data

    # strict=False
    t = IntervalTree.from_tuples(data.ivs2.data)
    rng = t.range()
    t.merge_overlaps(strict=False)
    t.verify()
    assert len(t) == 1
    assert t.pop() == rng
예제 #24
0
def test_merge_overlaps_gapless():
    # default strict=True
    t = IntervalTree.from_tuples(data.ivs2.data)
    t.merge_overlaps()
    t.verify()
    assert [(iv.begin, iv.end, iv.data) for iv in sorted(t)] == data.ivs2.data

    # strict=False
    t = IntervalTree.from_tuples(data.ivs2.data)
    rng = t.range()
    t.merge_overlaps(strict=False)
    t.verify()
    assert len(t) == 1
    assert t.pop() == rng
예제 #25
0
    def from_gtf(
            cls,
            gtf_path,  # type: pathlib.Path
            chromosomes=None,  # type: List[str]
            record_filter=None  # type: Callable[[Any], bool]
    ):  # type: (...) -> TranscriptReference
        """Builds an Reference instance from the given GTF file."""

        # Open gtf file.
        gtf = pysam.TabixFile(native_str(gtf_path), parser=pysam.asGTF())

        if chromosomes is None:
            chromosomes = gtf.contigs

        # Build the trees.
        transcript_trees = {}
        exon_trees = {}

        for chrom in chromosomes:
            # Collect exons and transcripts.
            transcripts = []
            exons = []

            records = gtf.fetch(reference=chrom)

            if record_filter is not None:
                records = (rec for rec in records if record_filter(rec))

            for record in records:
                if record.feature == 'transcript':
                    transcripts.append(cls._record_to_transcript(record))
                elif record.feature == 'exon':
                    exons.append(cls._record_to_exon(record))

            # Build transcript lookup tree.
            transcript_trees[chrom] = IntervalTree.from_tuples(
                (tr.start, tr.end, tr) for tr in transcripts)

            # Build exon lookup tree.
            keyfunc = lambda rec: rec.transcript_id

            exons = sorted(exons, key=keyfunc)
            grouped = itertools.groupby(exons, key=keyfunc)

            for tr_id, grp in grouped:
                exon_trees[tr_id] = IntervalTree.from_tuples(
                    (exon.start, exon.end, exon) for exon in grp)

        return cls(transcript_trees, exon_trees)
예제 #26
0
파일: util.py 프로젝트: sofiaff/imfusion
    def from_gtf(
        cls,
        gtf_path,  # type: pathlib.Path
        chromosomes=None,  # type: List[str]
        record_filter=None  # type: Callable[[Any], bool]
    ):  # type: (...) -> TranscriptReference
        """Builds an Reference instance from the given GTF file."""

        # Open gtf file.
        gtf = pysam.TabixFile(native_str(gtf_path), parser=pysam.asGTF())

        if chromosomes is None:
            chromosomes = gtf.contigs

        # Build the trees.
        transcript_trees = {}
        exon_trees = {}

        for chrom in chromosomes:
            # Collect exons and transcripts.
            transcripts = []
            exons = []

            records = gtf.fetch(reference=chrom)

            if record_filter is not None:
                records = (rec for rec in records if record_filter(rec))

            for record in records:
                if record.feature == 'transcript':
                    transcripts.append(cls._record_to_transcript(record))
                elif record.feature == 'exon':
                    exons.append(cls._record_to_exon(record))

            # Build transcript lookup tree.
            transcript_trees[chrom] = IntervalTree.from_tuples(
                (tr.start, tr.end, tr) for tr in transcripts)

            # Build exon lookup tree.
            keyfunc = lambda rec: rec.transcript_id

            exons = sorted(exons, key=keyfunc)
            grouped = itertools.groupby(exons, key=keyfunc)

            for tr_id, grp in grouped:
                exon_trees[tr_id] = IntervalTree.from_tuples(
                    (exon.start, exon.end, exon) for exon in grp)

        return cls(transcript_trees, exon_trees)
예제 #27
0
def test_merge_neighbors_reducer_with_initializer():
    def reducer(old, new):
        return old + [new]

    # empty tree
    e = IntervalTree()
    e.merge_neighbors(data_reducer=reducer, data_initializer=[])
    e.verify()
    assert not e

    # one Interval in tree
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_neighbors(data_reducer=reducer, data_initializer=[])
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, ['hello'])]

    # many Intervals in tree, without gap
    _data_no_gap = (
        (1, 2, '[1,2)'),
        (2, 3, '[2,3)'),
        (3, 4, '[3,4)'),
    )
    t = IntervalTree.from_tuples(_data_no_gap)
    t.merge_neighbors(data_reducer=reducer, data_initializer=[])
    t.verify()
    assert len(t) == 1
    for begin, end, _data in t.items():
        assert begin == 1
        assert end == 4
        assert _data == ['[1,2)', '[2,3)', '[3,4)']

    # many Intervals in tree, with gap and distance=2
    _data_gap = (
        (1, 2, '[1,2)'),
        (4, 6, '[4,6)'),
        (5, 8, '[5,8)'),
        (13, 15, '[13,15)'),
    )
    t = IntervalTree.from_tuples(_data_gap)
    t.merge_neighbors(data_reducer=reducer, data_initializer=[], distance=2)
    t.verify()
    assert len(t) == 3
    assert sorted(t) == [
        Interval(1, 6, ['[1,2)', '[4,6)']),
        Interval(5, 8, ['[5,8)']),
        Interval(13, 15, ['[13,15)']),
    ]
예제 #28
0
파일: turn.py 프로젝트: kingfener/dscore
def merge_turns(turns):
    """Merge overlapping turns by same speaker within each file."""
    # Split turns by file and speaker.
    turn_map = defaultdict(list)
    file_to_speakers = defaultdict(set)
    for turn in turns:
        turn_map[(turn.file_id, turn.speaker_id)].append(turn)
        file_to_speakers[turn.file_id].add(turn.speaker_id)

    # Merge separately within each file and for each speaker.
    new_turns = []
    file_ids = set([file_id for file_id, _ in iterkeys(turn_map)])
    for file_id in sorted(file_ids):
        for speaker_id in sorted(file_to_speakers[file_id]):
            speaker_turns = turn_map[(file_id, speaker_id)]
            speaker_it = IntervalTree.from_tuples(
                [(turn.onset, turn.offset) for turn in speaker_turns])
            n_turns_pre = len(speaker_it)
            speaker_it.merge_overlaps()
            n_turns_post = len(speaker_it)
            if n_turns_post < n_turns_pre:
                speaker_turns = []
                for intrvl in speaker_it:
                    speaker_turns.append(
                        Turn(intrvl.begin, intrvl.end, speaker_id=speaker_id,
                             file_id=file_id))
                speaker_turns = sorted(
                    speaker_turns, key=lambda x: (x.onset, x.offset))
                warn('Merging overlapping speaker turns. '
                     'FILE: %s, SPEAKER: %s' % (file_id, speaker_id))
            new_turns.extend(speaker_turns)
    turns = new_turns

    return turns
예제 #29
0
def make_annot(args):
    """
    Create binary annotations from 0.1, 0.01 and 0.001 p-value stratuums of the summary statistic.
    """
    check_input_file(args.sumstats_file)
    for chri in range(1, 23): check_output_file(args.output_file.format(chri), args.force)

    print('Reading summary statistics file {}...'.format(args.sumstats_file))
    sumstats = pd.read_csv(args.sumstats_file, delim_whitespace=True, usecols=['PVAL', 'CHR', 'BP'])
    print('Done, read {} SNPs.'.format(sumstats.shape[0]))

    for chri in range(1, 23):
        print('Processing chromosome {}...'.format(chri))
        df = pd.read_csv(args.annot.format(chri), delim_whitespace=True)
        df = df[['CHR', 'BP', 'SNP', 'CM']].copy()
        for pthresh, label in [(0.1, '.1'), (0.01, '.01'), (0.001, '.001')]:
            sumstatsCHR = sumstats[sumstats.CHR == chri].copy(deep=True)
            print('{} markers, {} of them are on chr {}, {} of them have p-value below {}'.format(sumstats.shape[0], sumstatsCHR.shape[0], chri, (sumstatsCHR.PVAL < pthresh).sum(), pthresh))
            itree = IntervalTree.from_tuples(zip(sumstatsCHR[sumstatsCHR.PVAL < pthresh].BP - args.window, sumstatsCHR[sumstatsCHR.PVAL < pthresh].BP + args.window))
            itree.merge_overlaps()
            print('Found {} intervals, average length {}'.format(len(itree),  sum([i.length() for i in itree])/len(itree)))

            annot_binary = [int(bool(itree[p])) for p in df.BP]
            df['PVAL{}'.format(label)] = annot_binary
            print('{} markers out of {} ({}%) belongs to the annotation'.format(sum(annot_binary), len(annot_binary), 100 * sum(annot_binary) / len(annot_binary)))
        df.to_csv(args.output_file.format(chri), index=False, sep='\t', compression='gzip')
        print('Results saved to {}'.format(args.output_file.format(chri)))
예제 #30
0
def test_span():
    e = IntervalTree()
    assert e.span() == 0

    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t.span() == t.end() - t.begin()
    assert t.span() == 14
예제 #31
0
def test_merge_overlaps_with_gap():
    t = IntervalTree.from_tuples(data.ivs1.data)

    t.merge_overlaps()
    t.verify()
    assert len(t) == 2
    assert t == IntervalTree([Interval(1, 2, '[1,2)'), Interval(4, 15)])
예제 #32
0
def test_delete():
    t = IntervalTree.from_tuples(data.ivs1.data)
    try:
        t.remove(Interval(1, 3, "Doesn't exist"))
    except ValueError:
        pass
    else:
        raise AssertionError("Expected ValueError")

    try:
        t.remove(Interval(500, 1000, "Doesn't exist"))
    except ValueError:
        pass
    else:
        raise AssertionError("Expected ValueError")

    orig = t.print_structure(True)
    t.discard(Interval(1, 3, "Doesn't exist"))
    t.discard(Interval(500, 1000, "Doesn't exist"))
    assert orig == t.print_structure(True)

    assert match.set_data(t[14]) == set(['[8,15)', '[14,15)'])
    t.remove(Interval(14, 15, '[14,15)'))
    assert match.set_data(t[14]) == set(['[8,15)'])
    t.verify()

    t.discard(Interval(8, 15, '[8,15)'))
    assert match.set_data(t[14]) == set()
    t.verify()

    assert t[5]
    t.remove_overlap(5)
    t.verify()
    assert not t[5]
예제 #33
0
    def getTrackData(self, chrom_len, gene2info):
        gene2intervals = {}
        rainbow2gene = {}
        for gene in gene2info:
            gene2intervals[gene] = gene2info[gene]
            for index, curr in enumerate(gene2info[gene]):
                rainbow2gene[curr['r_id']] = gene
                gene2intervals[gene][index] = curr
                for subtype in curr['interval']:
                    gene2intervals[gene][index]['interval'][subtype] = self.tree2json(curr['interval'][subtype], False)

        trees = {'+':{}, '-':{}}

        strand2info = {'+':[], '-':[]}
        for gene in gene2info:
            for index, curr in enumerate(gene2info[gene]):
                for strand in ['+', '-']:
                    if curr['annot']['strand'] == strand:
                        strand2info[strand].append([curr['annot']['start'], curr['annot']['end'], curr['r_id']])

        #strand2info = {strand:[[gene2info[curr]['annot']['start'], gene2info[curr]['annot']['end'], curr] for curr in gene2info if gene2info[curr]['annot']['strand'] == strand] for strand in ['+', '-']}
        for strand in ['+', '-']:
            trees[strand] = IntervalTree.from_tuples(strand2info[strand])

        interval2genes = self.getRangesFromTree(chrom_len, trees)
        interval2blocks = self.getBlocksFromTree(chrom_len, trees)

        return interval2genes, interval2blocks, rainbow2gene
예제 #34
0
def build_capture_trees(bed_file):
    """Reads BED file and returns a dict of 25 interval trees (1/per chromosome)"""
    # read the BED file
    bed_df = pd.read_csv(bed_file,
                         usecols=[0, 1, 2, 3],
                         sep='\t',
                         names=['chrom', 'start', 'stop', 'gene'],
                         dtype={
                             'chrom': str,
                             'start': int,
                             'stop': int,
                             'gene': str
                         })

    # Intervals exclude the end point, so increment all stops
    bed_df['stop'] += 1

    # build an interval tree for each chromosome
    trees = {}
    chroms = [str(i) for i in range(1, 23)] + ['X', 'Y']
    for c in chroms:
        # select rows for c
        chrom_df = bed_df[bed_df.chrom == c][['start', 'stop', 'gene']]
        # convert rows to series of tuples
        chrom_tuples = [tuple(x) for x in chrom_df.values]
        # build the interval tree from tuples
        trees[c] = IntervalTree.from_tuples(chrom_tuples)

    return trees
예제 #35
0
파일: ctg.py 프로젝트: jrderuiter/im-fusion
def _subset_to_windows(
        insertions,  # type: List[Insertion]
        gene_windows  # type: Dict[str, Tuple[str, int, int]]
):  # type: (...) -> List[Insertion]
    """Subsets insertions for given gene windows."""

    # Create lookup trees.
    trees = {
        chrom: IntervalTree.from_tuples((i[1:]) for i in chrom_int)
        for chrom, chrom_int in itertools.groupby(
            sorted(gene_windows.values()), operator.itemgetter(0))
    }

    # Determine which insertions overlap tree intervals and
    # correspond to genes with known gene window.
    def _in_windows(ins, trees):
        try:
            return trees[ins.seqname].overlaps(ins.position)
        except KeyError:
            return False

    return [
        ins for ins in insertions
        if ins.metadata['gene_id'] in gene_windows and _in_windows(ins, trees)
    ]
예제 #36
0
def merge_turns(turns):
    """Merge overlapping turns by same speaker within each file."""
    # Merge separately within each file and for each speaker.
    new_turns = []
    for (file_id, speaker_id), speaker_turns in groupby(
            turns, lambda x: (x.file_id, x.speaker_id)):
        speaker_turns = list(speaker_turns)
        speaker_it = IntervalTree.from_tuples([(turn.onset, turn.offset)
                                               for turn in speaker_turns])
        n_turns_pre = len(speaker_it)
        speaker_it.merge_overlaps()
        n_turns_post = len(speaker_it)
        if n_turns_post < n_turns_pre:
            speaker_turns = []
            for intrvl in speaker_it:
                speaker_turns.append(
                    Turn(intrvl.begin,
                         intrvl.end,
                         speaker_id=speaker_id,
                         file_id=file_id))
            speaker_turns = sorted(speaker_turns,
                                   key=lambda x: (x.onset, x.offset))
            warn('Merging overlapping speaker turns. '
                 'FILE: %s, SPEAKER: %s' % (file_id, speaker_id))
        new_turns.extend(speaker_turns)
    return new_turns
예제 #37
0
    def getGene2Info(self, chrom, data):
        gene2info = {}
        rainbow2gene = {}
        rainbow_tree = {}

        ranges = {'+':[], '-':[]}
        r_id = 0
        for strand in data:
            tree_tuple = []
            for datum in data[strand]:
                r_id += 1
                start, end, (element, *values) = datum
                tree_tuple.append([start, end, r_id])
                rainbow2gene[r_id] = element
                curr = {
                        'r_id':r_id,
                        'annot':{'chrom':chrom,'strand':strand,'start': start,'end': end,'values': values },
                        'interval':{ '-':[ [start, end] ] }
                        }
                try:
                    gene2info[element].append(curr)
                except:
                    gene2info[element] = [curr]
            rainbow_tree[strand] = IntervalTree.from_tuples(tree_tuple)

        return gene2info, rainbow2gene, rainbow_tree
예제 #38
0
def test_span():
    e = IntervalTree()
    assert e.span() == 0

    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t.span() == t.end() - t.begin()
    assert t.span() == 14
예제 #39
0
파일: ctg.py 프로젝트: sofiaff/imfusion
def _subset_to_windows(
    insertions,  # type: List[Insertion]
    gene_windows  # type: Dict[str, Tuple[str, int, int]]
):  # type: (...) -> List[Insertion]
    """Subsets insertions for given gene windows."""

    # Create lookup trees.
    trees = {
        chrom: IntervalTree.from_tuples((i[1:]) for i in chrom_int)
        for chrom, chrom_int in itertools.groupby(
            sorted(gene_windows.values()), operator.itemgetter(0))
    }

    # Determine which insertions overlap tree intervals and
    # correspond to genes with known gene window.
    def _in_windows(ins, trees):
        try:
            return trees[ins.seqname].overlaps(ins.position)
        except KeyError:
            return False

    return [
        ins for ins in insertions
        if ins.metadata['gene_id'] in gene_windows and _in_windows(ins, trees)
    ]
예제 #40
0
def test_difference_operator():
    minuend = IntervalTree.from_tuples(data.ivs1.data)
    assert isinstance(minuend, IntervalTree)
    subtrahend = minuend.copy()
    expected_difference = IntervalTree([subtrahend.pop()])
    expected_difference.add(subtrahend.pop())

    minuend.verify()
    subtrahend.verify()
    expected_difference.verify()

    assert len(expected_difference) == len(minuend) - len(subtrahend)

    for iv in expected_difference:
        assert iv not in subtrahend
        assert iv in minuend

    difference = minuend - subtrahend
    difference.verify()

    for iv in difference:
        assert iv not in subtrahend
        assert iv in minuend
        assert iv in expected_difference

    assert difference == expected_difference
예제 #41
0
    def getGene2Info(self, genes):
        gene2info = {}

        for rainbow_id, gene in enumerate(genes):
            gene_id = gene['annot']['gene_id']
            #curr_info = gene2info[gene_id]
            curr_info = {'r_id': rainbow_id}

            curr_info['annot'] = {
                key: gene['annot'][key]
                for key in gene['annot'] if key != 'gene_id'
            }
            curr_info['interval'] = {}
            for subtype in gene['parts']:
                curr_tree = IntervalTree.from_tuples(
                    [curr[:2] for curr in gene['parts'][subtype]])
                curr_tree.merge_overlaps()
                curr_info['interval'][subtype] = curr_tree
                curr_info['annot']['start'] = curr_tree.begin()
                curr_info['annot']['end'] = curr_tree.end()

            try:
                gene2info[gene_id].append(curr_info)
            except:
                gene2info[gene_id] = [curr_info]

        return gene2info
def extract_intervals_for_genes_from_gff(genes: Set[str],
                                         gff_stream: TextIO,
                                         padding: int = 0) -> IntervalTree:
    intervals = []
    for row in map(str.rstrip, gff_stream):
        if row.startswith("#") or not row:
            continue
        fields = row.split("\t")
        if fields[2].lower() != "gene":
            continue

        attributes = attributes_dict_from_str(fields[8])
        name = attributes.get("gene", attributes.get("Name", None))
        if name is None:
            logger.warning(f"No gene/Name attribute for ID {attributes['ID']}")
            continue

        if name not in genes:
            continue

        start = (int(fields[3]) -
                 1) - padding  # GFF start is 1-based inclusive
        end = int(fields[4]) + padding  # GFF end is 1-based inclusive
        strand = fields[6]
        intervals.append((start, end, (name, strand)))

    return IntervalTree.from_tuples(intervals)
예제 #43
0
def make_age_intervals(df: Optional[pd.DataFrame] = None,
                       gbd_round_id: Optional[int] = None) -> IntervalTree:
    """
    Makes an interval tree out of age lower and upper for age group IDs.
    The interval tree can be made from an existing data frame with those columns
    or it can be made from getting the full set of age groups from the IHME databases.

    Parameters
    ----------
    df
        Data frame from which to construct the interval tree. Must have the
        columns ['age_group_id', 'age_lower', 'age_upper']. If passed, ignores gbd_round_id.
    gbd_round_id
        The gbd round ID from which to pull the age group metadata which is used
        to construct the interval tree. Ignored if df is specified instead.
    """
    if df is None and gbd_round_id is None:
        raise IhmeIDError(
            "Need to pass either a data frame with columns"
            "['age_group_id', 'age_lower', 'age_upper' or a valid"
            "gbd_round_id to get the full set of age groups.")
    if df is None:
        df = get_age_group_metadata(gbd_round_id=gbd_round_id)
    else:
        for col in ['age_group_id', 'age_lower', 'age_upper']:
            if col not in df.columns:
                raise IhmeIDError(
                    f"The data frame columns {df.columns} do not contain"
                    f"the required column {col}.")
    age_intervals = IntervalTree.from_tuples(
        df[['age_lower', 'age_upper', 'age_group_id']].values)
    return age_intervals
예제 #44
0
def test_pickle():
    t = IntervalTree.from_tuples(data.ivs1.data)

    p = pickle.dumps(t)
    t2 = pickle.loads(p)

    assert t == t2
    t2.verify()
예제 #45
0
def optimality_core():
    #tree = test_build_tree()
    #write_result(tree)
    #print(len(tree))
    matrix = OptimalityTestMatrix({
        'issue4result': IntervalTree.from_tuples(data.issue4_result.data),
    })
    pprint(matrix.summary_matrix)
예제 #46
0
def test_mismatched_tree_and_membership_set():
    t = IntervalTree.from_tuples(data.ivs1.data)
    members = set(t.all_intervals)
    assert t.all_intervals == members
    t.removei(1, 2, '[1,2)')
    assert t.all_intervals != members
    t.all_intervals = members  # intentionally introduce error
    with pytest.raises(AssertionError):
        t.verify()
예제 #47
0
def test_emptying_iteration():
    t = IntervalTree.from_tuples(data.ivs1.data)

    for iv in sorted(iter(t)):
        t.remove(iv)
        t.verify()
    assert len(t) == 0
    assert t.is_empty()
    assert not t
예제 #48
0
def test_overlaps():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert not t.overlaps(-3.2)
    assert t.overlaps(1)
    assert t.overlaps(1.5)
    assert t.overlaps(0, 3)
    assert not t.overlaps(0, 1)
    assert not t.overlaps(2, 4)
    assert not t.overlaps(4, 2)
    assert not t.overlaps(3, 0)
예제 #49
0
def test_emptying_clear():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t
    t.clear()
    assert len(t) == 0
    assert t.is_empty()
    assert not t

    # make sure emptying an empty tree does not crash
    t.clear()
예제 #50
0
def tree():
    t = IntervalTree.from_tuples(data)
    # Node<10.58, depth=3, balance=1>
    #  Interval(8.65, 13.65)
    root = Node()
    root.x_center = 10.58
    root.s_center = set([Interval(*data[0])])
    root.depth = 3
    root.balance = 1

    # <:  Node<5.66, depth=1, balance=0>
    #      Interval(3.57, 9.47)
    #      Interval(5.38, 10.38)
    #      Interval(5.66, 9.66)
    n = root.left_node = Node()
    n.x_center = 5.66
    n.s_center = set(Interval(*tup) for tup in data[1:4])
    n.depth = 1
    n.balance = 0

    # >:  Node<16.49, depth=2, balance=-1>
    #      Interval(16.49, 20.83)
    n = root.right_node = Node()
    n.x_center = 16.49
    n.s_center = set([Interval(*data[4])])
    n.depth = 2
    n.balance = -1

    #     <:  Node<11.42, depth=1, balance=0>
    #          Interval(11.42, 16.42)
    n.left_node = Node()
    n = n.left_node
    n.x_center = 11.42
    n.s_center = set([Interval(*data[5])])
    n.depth = 1
    n.balance = 0

    structure = root.print_structure(tostring=True)
    # root.print_structure()
    assert structure == """\
Node<10.58, depth=3, balance=1>
 Interval(8.65, 13.65)
<:  Node<5.66, depth=1, balance=0>
     Interval(3.57, 9.47)
     Interval(5.38, 10.38)
     Interval(5.66, 9.66)
>:  Node<16.49, depth=2, balance=-1>
     Interval(16.49, 20.83)
    <:  Node<11.42, depth=1, balance=0>
         Interval(11.42, 16.42)
"""
    t.top_node = root
    t.verify()
    return t
예제 #51
0
def test_point_queries():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert match.set_data(t[4]) == set(['[4,7)'])
    assert match.set_data(t.at(4)) == set(['[4,7)'])
    assert match.set_data(t[9]) == set(['[6,10)', '[8,10)', '[8,15)'])
    assert match.set_data(t.at(9)) == set(['[6,10)', '[8,10)', '[8,15)'])
    assert match.set_data(t[15]) == set()
    assert match.set_data(t.at(15)) == set()
    assert match.set_data(t[5]) == set(['[4,7)', '[5,9)'])
    assert match.set_data(t.at(5)) == set(['[4,7)', '[5,9)'])
    assert match.set_data(t[4:5]) == set(['[4,7)'])
예제 #52
0
 def __init__(self, path, slop=200):
     from intervaltree import IntervalTree
     from csv import reader
     self.trees = {}
     self.slop = slop
     with open(path, "rt") as bed:
         chrm = None
         ivls = []
         for row in reader(bed, delimiter="\t"):
             if row[0] != chrm:
                 if len(ivls) > 0:
                     self.trees[chrm] = IntervalTree.from_tuples(ivls)
                 chrm = row[0]
                 ivls = []
             ivls.append((
                 max(1, int(row[1]) - slop + 1),
                 int(row[2]) + slop + 1
             ))
         if len(ivls) > 0:
             self.trees[chrm] = IntervalTree.from_tuples(ivls)
예제 #53
0
def test_remove_overlap():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert t[1]
    t.remove_overlap(1)
    assert not t[1]
    t.verify()

    assert t[8]
    t.remove_overlap(8)
    assert not t[8]
    t.verify()
예제 #54
0
def test_split_overlap():
    t = IntervalTree.from_tuples(data.ivs1.data)

    t.split_overlaps()
    t.verify()

    while t:
        iv = set(t).pop()
        t.remove(iv)
        for other in t.overlap(iv):
            assert other.begin == iv.begin
            assert other.end == iv.end
예제 #55
0
def tree():
    t = IntervalTree.from_tuples(data)
    # Node<961, depth=2, balance=0>
    #  Interval(961, 986, 1)
    root = Node()
    root.x_center = 961
    root.s_center = set([Interval(*data[7])])
    root.depth = 2
    root.balance = 0

    # <:  Node<871, depth=1, balance=0>
    #      Interval(860, 917, 1)
    #      Interval(860, 917, 2)
    #      Interval(860, 917, 3)
    #      Interval(860, 917, 4)
    #      Interval(871, 917, 1)
    #      Interval(871, 917, 2)
    #      Interval(871, 917, 3)
    n = root.left_node = Node()
    n.x_center = 871
    n.s_center = set(Interval(*tup) for tup in data[:7])
    n.depth = 1
    n.balance = 0

    # >:  Node<1047, depth=1, balance=0>
    #      Interval(1047, 1064, 1)
    #      Interval(1047, 1064, 2)
    n = root.right_node = Node()
    n.x_center = 1047
    n.s_center = set(Interval(*tup) for tup in data[8:])
    n.depth = 1
    n.balance = 0

    structure = root.print_structure(tostring=True)
    # root.print_structure()
    assert structure == """\
Node<961, depth=2, balance=0>
 Interval(961, 986, 1)
<:  Node<871, depth=1, balance=0>
     Interval(860, 917, 1)
     Interval(860, 917, 2)
     Interval(860, 917, 3)
     Interval(860, 917, 4)
     Interval(871, 917, 1)
     Interval(871, 917, 2)
     Interval(871, 917, 3)
>:  Node<1047, depth=1, balance=0>
     Interval(1047, 1064, 1)
     Interval(1047, 1064, 2)
"""
    t.top_node = root
    t.verify()
    return t
예제 #56
0
def test_envelop_vs_overlap_queries():
    t = IntervalTree.from_tuples(data.ivs1.data)
    assert match.set_data(t.envelop(4, 5)) == set()
    assert match.set_data(t.overlap(4, 5)) == set(['[4,7)'])
    assert match.set_data(t.envelop(4, 6)) == set()
    assert match.set_data(t.overlap(4, 6)) == set(['[4,7)', '[5,9)'])
    assert match.set_data(t.envelop(6, 10)) == set(['[6,10)', '[8,10)'])
    assert match.set_data(t.overlap(6, 10)) == set([
        '[4,7)', '[5,9)', '[6,10)', '[8,10)', '[8,15)'])
    assert match.set_data(t.envelop(6, 11)) == set(['[6,10)', '[8,10)'])
    assert match.set_data(t.overlap(6, 11)) == set([
        '[4,7)', '[5,9)', '[6,10)', '[8,10)', '[8,15)', '[10,12)'])
예제 #57
0
def removeGenusZeroLinks(LinkData): 
    """
    Removes all links that are are equivalent toplogically to other links and
    do not contribute to genus! Uses IntervalTree to find all crossing links to a 
    given link (in O(N log N) vs. O(N^2) for N links). 
    
    Required IntervalTree package! 
    
    *Args*:
        LinkData: 
            Nx2 links, rows (p1,p2) 
    *Returns*:
        removal_linkData:
            The list of links retained 
    """
    
    org_tree = IntervalTree.from_tuples(list(map(tuple, LinkData)) )  
    removal_tree = IntervalTree.from_tuples(list(map(tuple, LinkData)) )
    
    for m,t in enumerate(org_tree): 
        
        loc_set = org_tree[t[0]:t[1]]
    
        for l in loc_set: 
            if t[0] >= l[0] and t[1]<= l[1] and t!=l: #find if i is contained in l
                
                #now find the intervals overlapping Interval(i[0], l[0]) and i[1], l[1]
                left_set = org_tree[l[0]:t[0]]
                right_set = org_tree[t[1]:l[1]]
                if len(right_set) == 1 and len(left_set) ==1: # the right and left set has one overlap: 
                    # the interval l 
                    removal_tree.remove(t)
                
    removal_linkData = [[i[0], i[1]] for i in removal_tree]
    return removal_linkData    
            
################################################################################
예제 #58
0
def test_merge_equals_reducer_with_initializer():
    def reducer(old, new):
        return old + [new]
    # empty tree
    e = IntervalTree()
    e.merge_equals(data_reducer=reducer, data_initializer=[])
    e.verify()
    assert not e

    # One Interval in tree, no change
    o = IntervalTree.from_tuples([(1, 2, 'hello')])
    o.merge_equals(data_reducer=reducer, data_initializer=[])
    o.verify()
    assert len(o) == 1
    assert sorted(o) == [Interval(1, 2, ['hello'])]

    # many Intervals in tree, no change
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    t.merge_equals(data_reducer=reducer, data_initializer=[])
    t.verify()
    assert len(t) == len(orig)
    assert t != orig
    assert sorted(t) == [Interval(b, e, [d]) for b, e, d in sorted(orig)]

    # many Intervals in tree, with change
    t = IntervalTree.from_tuples(data.ivs1.data)
    orig = IntervalTree.from_tuples(data.ivs1.data)
    t.addi(4, 7, 'foo')
    t.merge_equals(data_reducer=reducer, data_initializer=[])
    t.verify()
    assert len(t) == len(orig)
    assert t != orig
    assert not t.containsi(4, 7, 'foo')
    assert not t.containsi(4, 7, '[4,7)')
    assert t.containsi(4, 7, ['[4,7)', 'foo'])