Python IntervalTree.beginの例

プログラミング言語: Python

名前空間/パッケージ名: intervaltree

クラス/型: IntervalTree

メソッド/関数: begin

hotexamples.comのコード掲載数: 18

intervaltree.IntervalTree.beginはIntervalTreeオブジェクトのメソッドであり、指定された範囲の最小値を返します。このメソッドは、インターバルツリー内の範囲を表すオブジェクトの始点（開始点）を取得するために使用されます。インターバルツリーは、範囲クエリの効率的な実行や重なり合う範囲の検索に使用されるデータ構造です。beginメソッドは、指定された範囲の開始点を見つけるのに役立ちます。

Python IntervalTree.begin - 18件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのintervaltree.IntervalTree.beginの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

IntervalTree(30)

from_tuples(30)

overlap(30)

merge_overlaps(30)

search(30)

addi(30)

add(24)

chop(21)

end(16)

items(15)

at(14)

begin(14)

overlaps(14)

copy(10)

merge_equals(9)

removei(9)

clear(8)

envelop(8)

remove_overlap(6)

extend(6)

discard(4)

remove(3)

remove_envelop(3)

chop_intervals_that_envelope_range(3)

insert_interval(3)

is_empty(3)

discardi(2)

print_structure(2)

overlaps_point(2)

append(2)

add_interval(2)

merge_neighbors(2)

range(1)

pop(1)

score(1)

after(1)

find_nested(1)

iter(1)

intersection(1)

intersect(1)

insert(1)

after_interval(1)

find(1)

__iter__(1)

downstream_of_interval(1)

difference_update(1)

containsi(1)

computeUtilizationHistogram(1)

computeCountHistogram(1)

before_interval(1)

コード例 #1

ファイルを表示

def interval_tree(start_data, stop_data, buffer_len):
    starts = []
    stops = []
    t = IntervalTree()

    ## Shrink each interval by the buffer size
    for key, value in start_data.iteritems():
        for i in range(0, len(value)):
            shrunk_start = value[i] + buffer_len / 2.0
            shrunk_stop = stop_data[key][i] + 1 - buffer_len / 2.0
            if shrunk_start < shrunk_stop:
                t[shrunk_start:shrunk_stop] = (shrunk_start, shrunk_stop)

    ## Add chromosome endpoints without buffer
    chrom_start, chrom_stop = get_extremes(start_data, stop_data)
    if chrom_start < t.begin() + 1:
        t[chrom_start:t.begin() + 1] = (chrom_start, t.begin() + 1)
    if t.end() - 1 < chrom_stop:
        t[t.end() - 1:chrom_stop] = (t.end() - 1, chrom_stop)

    ## Merge intervals that overlap in tree to get consensus
    t.merge_overlaps()

    ## Check that original intervals only overlap with one consensus interval
    for key, value in start_data.iteritems():
        for i in range(0, len(value)):
            start = value[i]
            stop = stop_data[key][i] + 1
            if len(t[start:stop]) > 1:
                ## If they overlap with more than one
                ## Remove part of consensus interval
                ## This will never be more than the buffer size/2
                assert (len(t[start:stop]) == 2)
                remove_start = 0
                remove_stop = 0
                min_length = float('inf')
                for interval in t[start:stop]:
                    overlap_start, overlap_stop = get_overlap(
                        (start, stop), (interval[0], interval[1]))
                    if (overlap_stop - overlap_start) < min_length:
                        min_length = overlap_stop - overlap_start
                        remove_start = overlap_start
                        remove_stop = overlap_stop
                print(min_length)
                t.chop(remove_start, remove_stop)
                assert (min_length <= buffer_len / 2.0)
                assert (len(t[start:stop]) < 2)

    ## Get consensus start and stop points
    chrom_len = chrom_stop - chrom_start
    covered = 0.0
    for interval in sorted(t):
        starts.append(interval[0])
        stops.append(interval[1])
        covered = covered + (interval[1] - interval[0])

    print("The percentage of the chromosome covered is: %s" % '{0:.2f}'.format(
        (covered / chrom_len) * 100.0))

    return (starts, stops)

コード例 #2

ファイルを表示

ファイル: intervaltree_test.py プロジェクト: Artimi/intervaltree

def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    t.verify()

コード例 #3

ファイルを表示

ファイル: init_test.py プロジェクト: mjpieters/intervaltree

def test_list_init():
    tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)])
    tree.verify()
    assert tree
    assert len(tree) == 2
    assert tree.items() == set([Interval(-10, 10), Interval(-20.0, -10.0)])
    assert tree.begin() == -20
    assert tree.end() == 10

コード例 #4

ファイルを表示

ファイル: init_test.py プロジェクト: ProgVal/intervaltree

def test_list_init():
    tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)])
    tree.verify()
    assert tree
    assert len(tree) == 2
    assert tree.items() == set([Interval(-10, 10), Interval(-20.0, -10.0)])
    assert tree.begin() == -20
    assert tree.end() == 10

コード例 #5

ファイルを表示

ファイル: query_test.py プロジェクト: lcoombe/intervaltree

def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()

コード例 #6

ファイルを表示

ファイル: query_test.py プロジェクト: chaimleib/intervaltree

def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.overlap(t.begin(), t.end()) == e
    assert t.envelop(t.begin(), t.end()) == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()

コード例 #7

ファイルを表示

ファイル: brat.py プロジェクト: bionlplab/bioc

class BratEntity(BratAnnotation):
    """
    Each entity annotation has a unique ID and is defined by type (e.g. Person or Organization) and the span of
    characters containing the entity mention (represented as a "start end" offset pair). For example,

    ::

        T1  Organization 0 4  Sony
        T3  Organization 33 41  Ericsson
        T3  Country 75 81 Sweden

    Each line contains one text-bound annotation identifying the entity mention in text

    Represented in standoff as "`ID [tab] TYPE START END [tab] TEXT`" where START and END are positive integer offsets
    identifying the span of the annotation in text and `TEXT` is the corresponding text. Discontinuous annotations can
    be represented as "`ID [tab] TYPE START END[;START END]* [tab] TEXT`" with multiple START END pairs separated by
    semicolons.
    """
    def __init__(self):
        super(BratEntity, self).__init__()
        self.text = None  # type: Optional[str]
        self.locations = IntervalTree()  # type: IntervalTree

    def shift(self, offset: int):
        ent = BratEntity()
        ent.id = self.id
        ent.type = self.type
        ent.text = self.text
        for interval in self.locations:
            ent.locations[interval.begin + offset: interval.end + offset] = interval.data
        return ent

    def add_span(self, start: int, end: int, data = None):
        self.locations[start: end] = data

    @property
    def total_span(self) -> Tuple[int, int]:
        return self.locations.begin(), self.locations.end()

    def __eq__(self, other):
        if not isinstance(other, BratEntity):
            return False
        else:
            return self.id == other.id \
                   and self.type == other.type \
                   and self.text == other.text \
                   and self.locations == other.locations

    def __str__(self):
        return 'BratEntity[id=%s,type=%s,text=%s,loc=%s]' % (
            self.id, self.type, self.text, self.locations)

コード例 #8

ファイルを表示

ファイル: init_test.py プロジェクト: mjpieters/intervaltree

def test_generator_init():
    tree = IntervalTree(
        Interval(begin, end)
        for begin, end in [(-10, 10), (-20, -10), (10, 20)])
    tree.verify()
    assert tree
    assert len(tree) == 3
    assert tree.items() == set([
        Interval(-20, -10),
        Interval(-10, 10),
        Interval(10, 20),
    ])
    assert tree.begin() == -20
    assert tree.end() == 20

コード例 #9

ファイルを表示

ファイル: init_test.py プロジェクト: ProgVal/intervaltree

def test_generator_init():
    tree = IntervalTree(
        Interval(begin, end) for begin, end in
        [(-10, 10), (-20, -10), (10, 20)]
    )
    tree.verify()
    assert tree
    assert len(tree) == 3
    assert tree.items() == set([
        Interval(-20, -10),
        Interval(-10, 10),
        Interval(10, 20),
    ])
    assert tree.begin() == -20
    assert tree.end() == 20

コード例 #10

ファイルを表示

ファイル: simple_chop_coverage.py プロジェクト: tomokveld/CHOP

def gen_interval_tree(interval):
    # Returns an interval tree queryable by points and ranges
    # [0, n] -> (node, offset)
    #     Node        Node        Node
    # |---------|--------------|----------|

    offset = 0

    tree = IntervalTree()
    for pair in interval.split('|', 1)[-1].split('_'):
        (node, start, end) = map(int, re.split('[+,]', pair))

        tree[start + offset:end + offset] = (node, offset)
        offset += end

    return (tree, tree.begin())

コード例 #11

ファイルを表示

ファイル: smoothing.py プロジェクト: guoqi123/oncodriveCLUST

def smooth_nucleotide(regions, concat_regions_d, mutations, tukey_filter,
                      simulation_window):
    """Generate a smoothing curve for a list of element's mutations in the nucleotide sequence

    Args:
        regions (IntervalTree): IntervalTree with genomic positions of an element
        concat_regions_d (dict): keys are start genomic regions, values are positions (index) relative to the start
        mutations (list): list of mutations formatted as namedtuple
        tukey_filter (numpy.ndarray): kde array, length equals smoothing window.
        simulation_window (int): simulation window

    Returns:
        final_smooth_tree (IntervalTree): interval are genomic regions or indexes (concatenate mode),
            data np.array of smoothing score by position
        mutations_in (list): list of mutations in regions
    """
    first_smooth_tree = IntervalTree()
    final_smooth_tree = IntervalTree()
    mutations_in = []

    # Generate smoothing arrays for regions
    for interval in regions:
        # Add extra bases for smoothing of simulated mutations that fall outside regions and tukey_filter
        first_smooth_tree.addi(
            interval.begin, interval.end,
            np.zeros((interval.end - interval.begin + len(tukey_filter) +
                      simulation_window - 2)))

    if not concat_regions_d:
        # Smooth
        for mutation in mutations:
            for interval in first_smooth_tree[mutation.region[0]]:
                # Get index of mutation in region
                new_begin = interval.begin - (simulation_window +
                                              len(tukey_filter) -
                                              2) // 2  # always integer
                index = mutation.position - new_begin
                tukey_begin = index - (len(tukey_filter) - 1) // 2
                # Smooth mutations
                interval.data[tukey_begin:tukey_begin +
                              len(tukey_filter)] += tukey_filter
            # Get mutations inside regions
            if regions[mutation.position]:
                mutations_in.append(mutation)

        # Remove extra bp
        for interval in first_smooth_tree:
            begin = interval.begin
            end = interval.end
            slicer = (simulation_window + len(tukey_filter) - 2) // 2
            final_smooth_tree.addi(begin, end, interval.data[slicer:-slicer])

    else:
        # Smooth simulated mutations outside regions
        for mutation in mutations:
            if not first_smooth_tree[mutation.position]:
                for interval in first_smooth_tree[mutation.region[0]]:
                    new_begin = interval.begin - (simulation_window +
                                                  len(tukey_filter) -
                                                  2) // 2  # always integer
                    index = mutation.position - new_begin
                    tukey_begin = index - (len(tukey_filter) - 1) // 2
                    # Smooth mutations
                    interval.data[tukey_begin:tukey_begin +
                                  len(tukey_filter)] += tukey_filter

        # Remove extra bp
        for interval in first_smooth_tree:
            begin = interval.begin
            end = interval.end
            slicer = (simulation_window + len(tukey_filter) - 2) // 2
            final_smooth_tree.addi(begin, end, interval.data[slicer:-slicer])

        # Merge sorted regions (one interval == concatenated sequence) and add tukey//2 to both ends
        concat_tree = IntervalTree()
        concat_array = np.zeros((len(tukey_filter) - 1) // 2)
        for interval in sorted(final_smooth_tree):
            concat_array = np.append(concat_array, interval.data)
        concat_array = np.append(concat_array,
                                 np.zeros((len(tukey_filter) - 1) // 2))
        concat_tree.addi(final_smooth_tree.begin(), final_smooth_tree.end(),
                         concat_array)
        final_smooth_tree = IntervalTree()

        # Smooth mutations inside regions
        for mutation in mutations:
            if first_smooth_tree[mutation.position]:
                for interval in concat_tree[mutation.position]:
                    # Get index of mutation in concatenated sequence
                    index = (mutation.position - mutation.region[0]
                             ) + concat_regions_d[mutation.region[0]].start
                    # Smooth mutations
                    interval.data[index:(index +
                                         len(tukey_filter))] += tukey_filter
                mutations_in.append(mutation)

        # Remove extra bp
        for interval in concat_tree:
            begin = interval.begin
            end = interval.end
            slicer = (len(tukey_filter) - 1) // 2
            final_smooth_tree.addi(begin, end, interval.data[slicer:-slicer])

    return final_smooth_tree, mutations_in

コード例 #12

ファイルを表示

    class CoordinateTranslator(object):
        class Leaf(object):
            def __init__(self, feature, coding_start, coding_stop):
                self.feature = feature
                self.start = feature.start
                self.stop = feature.stop
                self.coding_start = coding_start
                self.coding_stop = coding_stop

            def __str__(self):
                return 'genomic: [%s, %s], coding: [%s, %s]' % (
                    self.start, self.stop, self.coding_start, self.coding_stop)

        def __init__(self, exons, introns, strand, coding_offset,
                     coding_length):
            self.strand = strand
            self.coding_offset = coding_offset
            self.coding_length = coding_length
            self._exon_tree = IntervalTree()
            self._intron_tree = IntervalTree()
            self._genomic_tree = IntervalTree()

            _coding_start = -self.coding_offset

            for exon in (exons if self.strand == '+' else exons[::-1]):
                leaf = Transcript.CoordinateTranslator.Leaf(
                    exon, _coding_start, _coding_start + exon.length - 1)

                self._genomic_tree.addi(leaf.start, leaf.stop + 1, leaf)
                self._exon_tree.addi(leaf.coding_start, leaf.coding_stop + 1,
                                     leaf)

                # increment
                _coding_start = leaf.coding_stop + 1

            for intron in introns:
                # introns don't have coding coordinates, so use those of
                # adjacent exons
                leaf_genomic_upstream = \
                    list(self._genomic_tree[intron.start - 1])[0].data
                leaf_genomic_downstream = \
                    list(self._genomic_tree[intron.stop + 1])[0].data

                # NOTE: always assemble intronic offsets w.r.t. to the
                #  'coding stop' position of the upstream CDS
                if self.strand == '+':
                    leaf = \
                        Transcript.CoordinateTranslator.Leaf(
                            intron,
                            leaf_genomic_upstream.coding_stop,
                            leaf_genomic_downstream.coding_start
                        )
                else:
                    leaf = \
                        Transcript.CoordinateTranslator.Leaf(
                            intron,
                            leaf_genomic_downstream.coding_stop,
                            leaf_genomic_upstream.coding_start
                        )
                self._intron_tree.addi(leaf.start, leaf.stop + 1, leaf)

            # add introns that are upstream and downstream to the exon
            #  sequence
            # TODO: we may not need this, depending on how we choose to handle
            #  [start, stop] ranges that occur outside exon ranges
            if self.strand == '+':
                # straw upstream (genomic) intron
                straw0 = \
                    Feature('.', 0, self._genomic_tree.begin() - 1, self.strand, None)      # noqa
                leaf0 = \
                    Transcript.CoordinateTranslator.Leaf(straw0, -1, 0)
                self._intron_tree.addi(straw0.start, straw0.stop, leaf0)

                # straw downstream (genomic) intron
                straw1 = \
                    Feature('.', self._genomic_tree.end() + 1, sys.maxint, self.strand, None)      # noqa
                leaf1 = \
                    Transcript.CoordinateTranslator.Leaf(
                    straw1, self.coding_length - 1, self.coding_length)    # noqa
                self._intron_tree.addi(straw1.start, straw1.stop, leaf1)

            else:
                # straw upstream (genomic) intron
                straw0 = \
                    Feature('.', 0, self._genomic_tree.begin() - 1, self.strand, None)      # noqa
                leaf0 = \
                    Transcript.CoordinateTranslator.Leaf(straw0, self.coding_length - 1, self.coding_length)    # noqa

                self._intron_tree.addi(straw0.start, straw0.stop, leaf0)

                # straw downstream (genomic) intron
                straw1 = \
                    Feature('.', self._genomic_tree.end() + 1, sys.maxint, self.strand, None)      # noqa
                leaf1 = \
                    Transcript.CoordinateTranslator.Leaf(straw1, -1, 0)    # noqa
                self._intron_tree.addi(straw1.start, straw1.stop, leaf1)

        def to_coding_range(self, start, stop, hgvs_format=False):
            #  from above, introns have a coding_length == 1
            # TODO: set 'intron' attribute on leaves in '_intron_tree'
            #  above
            def _is_intron(leaf):
                return leaf.coding_stop - leaf.coding_start == 1

            # coding start
            range_coding_start = (list(self._genomic_tree[start]
                                       | self._intron_tree[start])
                                  or [None])[0]

            coding_start = None
            intron_coding_offset_start = 0
            leaf = range_coding_start.data
            if _is_intron(leaf):
                if self.strand == '+':
                    delta0 = start - leaf.start + 1
                    delta1 = leaf.stop + 1 - start
                    if hgvs_format and delta0 > delta1:
                        coding_start = leaf.coding_stop
                        intron_coding_offset_start = -delta1
                    else:
                        coding_start = leaf.coding_start
                        intron_coding_offset_start = delta0

                else:
                    delta0 = leaf.stop + 1 - stop
                    delta1 = stop - leaf.start + 1
                    if hgvs_format and delta0 > delta1:
                        coding_start = leaf.coding_stop
                        intron_coding_offset_start = -delta1
                    else:
                        coding_start = leaf.coding_start
                        intron_coding_offset_start = delta0
            else:
                if self.strand == '+':
                    coding_start = \
                        leaf.coding_start + (start - leaf.start)
                else:
                    coding_start = \
                        leaf.coding_start + (leaf.stop - stop)

            # coding stop
            range_coding_stop = (list(self._genomic_tree[stop]
                                      | self._intron_tree[stop]) or [None])[0]

            coding_stop = None
            intron_coding_offset_stop = 0
            leaf = range_coding_stop.data
            if _is_intron(leaf):
                if self.strand == '+':
                    delta0 = stop - leaf.start + 1
                    delta1 = leaf.stop + 1 - stop
                    if hgvs_format and delta0 > delta1:
                        coding_stop = leaf.coding_stop
                        intron_coding_offset_stop = -delta1
                    else:
                        coding_stop = leaf.coding_start
                        intron_coding_offset_stop = delta0

                else:
                    delta0 = leaf.stop + 1 - start
                    delta1 = start - leaf.start + 1
                    if hgvs_format and delta0 > delta1:
                        coding_stop = leaf.coding_stop
                        intron_coding_offset_stop = -delta1
                    else:
                        coding_stop = leaf.coding_start
                        intron_coding_offset_stop = delta0

            else:
                if self.strand == '+':
                    coding_stop = \
                        leaf.coding_stop - (leaf.stop - stop)
                else:
                    coding_stop = \
                        leaf.coding_stop - (start - leaf.start)

            return \
                Transcript.CodingRange(
                    coding_start,
                    coding_stop,
                    intron_coding_offset_start,
                    intron_coding_offset_stop
                )

        def to_genomic_ranges(self, coding_start, coding_stop):
            genomic_ranges = []
            list_ranges = sorted(self._exon_tree[coding_start:coding_stop + 1],
                                 reverse=self.strand == '-')

            for leaf in [r.data for r in list_ranges]:
                if self.strand == '+':
                    genomic_ranges.append(
                        Transcript.GenomicRange(
                            leaf.start +
                            max(coding_start - leaf.coding_start, 0),  # noqa
                            leaf.stop -
                            max(leaf.coding_stop - coding_stop, 0)  # noqa
                        ))
                else:
                    genomic_ranges.append(
                        Transcript.GenomicRange(
                            leaf.start +
                            max(leaf.coding_stop - coding_stop, 0),  # noqa
                            leaf.stop -
                            max(coding_start - leaf.coding_start, 0)  # noqa
                        ))

            return genomic_ranges

        def __str__(self):
            return 'coding sequences: %s' % map(str, self._tree)

コード例 #13

ファイルを表示

ファイル: getFusionFromVCF.py プロジェクト: szilvajuhos/btb-scripts

class ExonCoords:
    def __init__(self, chromosome, strand, breakpoint, gene_name,
                 exons: IntervalTree):
        self.chromosome = chromosome
        self.strand = strand
        self.breakpoint = breakpoint
        self.gene_name = gene_name
        self.exons = IntervalTree(exons)

    @classmethod
    def fromTuple(cls, a_tuple):
        return cls(a_tuple[0], a_tuple[1], a_tuple[2], a_tuple[3], a_tuple[4])

    @classmethod
    def copy_without_exons(cls, exc):
        return cls(exc.chromosome, exc.strand, exc.breakpoint, exc.gene_name,
                   IntervalTree())

    @classmethod
    def empty(cls):
        return cls("", 0, -1, "", IntervalTree())

    def print_properties(self):
        print("#########################################")
        print(
            "coordinates :", self.chromosome + ":" + str(self.exons.begin()) +
            "-" + str(self.exons.end()))
        print("gene        :", self.gene_name)
        print("strand      :", self._strand)
        print("breakpoint  :", self._breakpoint)
        print("exons       :", self._exons)
        print("#########################################")

    def print_as_bed(self):
        chromosome = self.chromosome
        for ex in sorted(self.exons):
            print(chromosome + "\t" + str(ex.begin) + "\t" + str(ex.end))

    @property
    def gene_name(self):
        return self._gene_name

    @gene_name.setter
    def gene_name(self, value):
        self._gene_name = value

    @property
    def chromosome(self):
        return self._chromosome

    @chromosome.setter
    def chromosome(self, value):
        self._chromosome = value

    @property
    def strand(self):
        return self._strand

    @strand.setter
    def strand(self, value):
        self._strand = value

    @property
    def breakpoint(self):  # int
        return self._breakpoint

    @breakpoint.setter
    def breakpoint(self, value):
        self._breakpoint = value

    @property
    def exons(self):  # IntervalTree()
        return self._exons

    @exons.setter
    def exons(self, exons):
        self._exons = exons

    def begin(self):
        return self.exons.begin()

コード例 #14

ファイルを表示

class IntervalGraph(object):
    """Base class for undirected interval graphs.

    The IntervalGraph class allows any hashable object as a node
    and can associate key/value attribute pairs with each undirected edge.

    Each edge must have two integers, begin and end for its interval.

    Self-loops are allowed but multiple edges
    (two or more edges with the same nodes, begin and end interval) are not.

    Two nodes can have more than one edge with different overlapping or non-overlapping intervals.

    Parameters
    ----------
    attr : keyword arguments, optional (default= no attributes)
        Attributes to add to graph as key=value pairs.

    Examples
    --------
    Create an empty graph structure (a "null interval graph") with no nodes and
    no edges.

    >>> G = dnx.IntervalGraph()

    G can be grown in several ways.

    **Nodes:**

    Add one node at a time:

    >>> G.add_node(1)

    Add the nodes from any container (a list, dict, set or
    even the lines from a file or the nodes from another graph).

    Add the nodes from any container (a list, dict, set)

    >>> G.add_nodes_from([2, 3])
    >>> G.add_nodes_from(range(100, 110))

    **Edges:**

    G can also be grown by adding edges. This can be considered
    the primary way to grow G, since nodes with no edge will not
    appear in G in most cases. See ``G.to_snapshot()``.

    Add one edge, which starts at 0 and ends at 10.
    Keep in mind that the interval is [0, 10).
    Thus, it does not include the end.

    >>> G.add_edge(1, 2, 0, 10)

    a list of edges,

    >>> G.add_edges_from([(1, 2, 0, 10), (1, 3, 3, 11)])

    If some edges connect nodes not yet in the graph, the nodes
    are added automatically. There are no errors when adding
    nodes or edges that already exist.

    **Attributes:**

    Each interval graph, node, and edge can hold key/value attribute pairs
    in an associated attribute dictionary (the keys must be hashable).
    By default these are empty, but can be added or changed using
    add_edge, add_node.

    Keep in mind that the edge interval is not an attribute of the edge.

    >>> G = dnx.IntervalGraph(day="Friday")
    >>> G.graph
    {'day': 'Friday'}

    Add node attributes using add_node(), add_nodes_from()

    >>> G.add_node(1, time='5pm')
    >>> G.add_nodes_from([3], time='2pm')

    Add edge attributes using add_edge(), add_edges_from().

    >>> G.add_edge(1, 2, 0, 10, weight=4.7 )
    >>> G.add_edges_from([(3, 4, 3, 11), (4, 5, 0, 33)], color='red')

    **Shortcuts:**

    Here are a couple examples of available shortcuts:

    >>> 1 in G  # check if node in interval graph during any interval
    True
    >>> len(G)  # number of nodes in the entire interval graph
    5

    **Subclasses (Advanced):**
    Edges in interval graphs are represented by Interval Objects and are kept
    in an IntervalTree. Both are based on
    intervaltree available in pypi (https://pypi.org/project/intervaltree).
    IntervalTree allows for fast interval based search through edges,
    which makes interval graph analyes possible.

    The Graph class uses a dict-of-dict-of-dict data structure.
    The outer dict (node_dict) holds adjacency information keyed by node.
    The next dict (adjlist_dict) represents the adjacency information and holds
    edge data keyed by interval object.  The inner dict (edge_attr_dict) represents
    the edge data and holds edge attribute values keyed by attribute names.
    """
    def __init__(self, **attr):
        """Initialize an interval graph with edges, name, or graph attributes.

        Parameters
        ----------
        attr : keyword arguments, optional (default= no attributes)
            Attributes to add to graph as key=value pairs.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G = dnx.IntervalGraph(name='my graph')
        >>> G.graph
        {'name': 'my graph'}
        """
        self.tree = IntervalTree()
        self.graph = {}  # dictionary for graph attributes
        self._adj = {}
        self._node = {}

        self.graph.update(attr)

    @property
    def name(self):
        """String identifier of the interval graph.

        This interval graph attribute appears in the attribute dict IG.graph
        keyed by the string `"name"`. as well as an attribute (technically
        a property) `IG.name`. This is entirely user controlled.
        """
        return self.graph.get('name', '')

    @name.setter
    def name(self, s):
        self.graph['name'] = s

    def __str__(self):
        """Return the interval graph name.

        Returns
        -------
        name : string
            The name of the interval graph.

        Examples
        --------
        >>> G = dnx.IntervalGraph(name='foo')
        >>> str(G)
        'foo'
        """
        return self.name

    def __len__(self):
        """Return the number of nodes. Use: 'len(G)'.

        Returns
        -------
        nnodes : int
            The number of nodes in the graph.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_nodes_from([2, 4, 5])
        >>> len(G)
        3

        """
        return len(self._node)

    def __contains__(self, n):
        """Return True if n is a node, False otherwise. Use: 'n in G'.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_node(2)
        >>> 2 in G
        True
        """
        try:
            return n in self._node
        except TypeError:
            return False

    def interval(self):
        """Return a 2-tuple as (begin, end) interval of the entire
         interval graph.

         Note that end is non-inclusive.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 0, 10), (3, 7, 9, 16)])
        >>> G.interval()
        (0, 16)
        """
        return self.tree.begin(), self.tree.end()

    def add_node(self, node_for_adding, **attr):
        """Add a single node `node_for_adding`  and update node attributes.

        Parameters
        ----------
        node_for_adding : node
            A node can be any hashable Python object except None.
        attr : keyword arguments, optional
            Set or change node attributes using key=value.

        See Also
        --------
        add_nodes_from

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_node(1)
        >>> G.add_node('Hello')
        >>> G.number_of_nodes()
        2

        Use keywords set/change node attributes:

        >>> G.add_node(1, size=10)
        >>> G.add_node(3, weight=0.4, UTM=('13S', 382871, 3972649))

        Notes
        -----
        A hashable object is one that can be used as a key in a Python
        dictionary. This includes strings, numbers, tuples of strings
        and numbers, etc.

        On many platforms hashable items also include mutables such as
        NetworkX Graphs, though one should be careful that the hash
        doesn't change on mutables.
        """
        if node_for_adding not in self._node:
            self._adj[node_for_adding] = {}
            self._node[node_for_adding] = attr
        else:  # update attr even if node already exists
            self._node[node_for_adding].update(attr)

    def add_nodes_from(self, nodes_for_adding, **attr):
        """Add multiple nodes.

        Parameters
        ----------
        nodes_for_adding : iterable container
            A container of nodes (list, dict, set, etc.).
            OR
            A container of (node, attribute dict) tuples.
            Node attributes are updated using the attribute dict.
        attr : keyword arguments, optional (default= no attributes)
            Update attributes for all nodes in nodes.
            Node attributes specified in nodes as a tuple take
            precedence over attributes specified via keyword arguments.

        See Also
        --------
        add_node

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_nodes_from('Hello')
        >>> G.has_node('e')
        True

        Use keywords to update specific node attributes for every node.

        >>> G.add_nodes_from([1, 2], size=10)
        >>> G.add_nodes_from([3, 4], weight=0.4)

        Use (node, attrdict) tuples to update attributes for specific nodes.

        >>> G.add_nodes_from([(1, dict(size=11)), (2, {'color':'blue'})])
        """
        for n in nodes_for_adding:
            # keep all this inside try/except because
            # CPython throws TypeError on n not in self._node,
            # while pre-2.7.5 ironpython throws on self._adj[n]
            try:
                if n not in self._node:
                    self._adj[n] = {}
                    self._node[n] = attr.copy()
                else:
                    self._node[n].update(attr)
            except TypeError:
                nn, ndict = n
                if nn not in self._node:
                    self._adj[nn] = {}
                    self._node[nn] = attr.copy()
                    self._node[nn].update(ndict)
                else:
                    self._node[nn].update(attr)
                    self._node[nn].update(ndict)

    def number_of_nodes(self, begin=None, end=None):
        """Return the number of nodes in the interval graph between the given interval.

        Parameters
        ----------
        begin: integer, optional  (default= beginning of the entire interval graph)
            Inclusive beginning time of the node appearing in the interval graph.
        end: integer, optional  (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the node appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.

        Returns
        -------
        nnodes : int
            The number of nodes in the interval graph.

        See Also
        --------
        __len__

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 0, 5), (3, 4, 8, 11)])
        >>> len(G)
        4
        >>> G.number_of_nodes()
        4
        >>> G.number_of_nodes(begin=6)
        2
        >>> G.number_of_nodes(begin=5, end=8) # end in non-inclusive
        2
        >>> G.number_of_nodes(end=8)
        4
        """

        if begin is None and end is None:
            return len(self._node)

        if begin is None:
            begin = self.tree.begin()

        if end is None:
            end = self.tree.end() + 1

        iedges = self.tree[begin:end]

        inodes = set()

        for iv in iedges:
            inodes.add(iv.data[0])
            inodes.add(iv.data[1])

        return len(inodes)

    def has_node(self, n, begin=None, end=None):
        """Return True if the interval graph contains the node n, during the given interval.

        Identical to `n in G` when 'begin' and 'end' are not defined.

        Parameters
        ----------
        n : node
        begin: integer, optional  (default= beginning of the entire interval graph)
            Inclusive beginning time of the node appearing in the interval graph.
        end: integer, optional  (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the node appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_ndoe(1)
        >>> G.has_node(1)
        True

        It is more readable and simpler to use

        >>> 0 in G
        True

        With interval query:

        >>> G.add_edge(3, 4, 2, 5)
        >>> G.has_node(3)
        True
        >>> G.has_node(3, begin=2)
        True
        >>> G.has_node(3, end=2) # end is non-inclusive
        False
        """
        try:
            exists_node = n in self._node
        except TypeError:
            exists_node = False

        if (begin is None and end is None) or not exists_node:
            return exists_node

        if begin is None:
            begin = self.tree.begin()

        if end is None:
            end = self.tree.end() + 1

        iedges = self._adj[n].keys()

        for iv in iedges:
            if iv.overlaps(begin=begin, end=end):
                return True

        return False

    def nodes(self, begin=None, end=None, data=False, default=None):
        """A NodeDataView of the IntervalGraph nodes.

        A nodes is considered to be present during an interval, if it has
        an edge with overlapping interval.

        Parameters
        ----------
        begin: integer, optional  (default= beginning of the entire interval graph)
            Inclusive beginning time of the node appearing in the interval graph.
        end: integer, optional  (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the node appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.
        data : string or bool, optional (default=False)
            The node attribute returned in 2-tuple (n, dict[data]).
            If False, return just the nodes n.
        default : value, optional (default=None)
            Value used for nodes that don't have the requested attribute.
            Only relevant if data is not True or False.

        Returns
        -------
        NodeDataView
            A NodeDataView iterates over `(n, data)` and has no set operations.

            When called, if data is False, an iterator over nodes.
            Otherwise an iterator of 2-tuples (node, attribute value)
            where data is True.

        Examples
        --------
        There are two simple ways of getting a list of all nodes in the graph:

        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)])
        [1, 2, 4, 6]

        To get the node data along with the nodes:

        >>> G.add_nodes_from([(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})])
        [(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})]

        >>> G.nodes(data="time")
        [(1, '1pm'), (2, '2pm'), (4, '4pm'), (6, None)]
        >>> G.nodes(data="time", default="5pm")
        [(1, '1pm'), (2, '2pm'), (4, '4pm'), (6, '5pm')]

        To get nodes which appear in a specific interval. nodes
        without an edge are not considered present.

        >>> G.nodes(begin=11, data=True)
        [(2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {'day': 'Friday'})]
        >>> G.nodes(begin=4, end=12) # non-inclusive end
        [1, 2, 4]
        """
        if begin is None and end is None:
            return NodeDataView(self._node, data=data, default=default)

        if begin is None:
            begin = self.tree.begin()

        if end is None:
            end = self.tree.end() + 1

        iedges = self.tree[begin:end]

        inodes = set()
        for iv in iedges:
            inodes.add(iv.data[0])
            inodes.add(iv.data[1])

        node_dict = {n: self._node[n] for n in inodes}

        return NodeDataView(node_dict, data=data, default=default)

    def remove_node(self, n, begin=None, end=None):
        """Remove the presence of a node n within the given interval.

        Removes the presence node n and all adjacent edges within the given interval.

        If interval is specified, all the edges of n will be removed within that interval.

        Quiet if n is not in the interval graph.

        Parameters
        ----------
        n : node
           A node in the graph
        begin: integer, optional  (default= beginning of the entire interval graph)
            Inclusive beginning time of the node appearing in the interval graph.
        end: integer, optional  (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the node appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.

        Examples
        --------
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)])
        >>> G.add_nodes_from([(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'})])
        >>> G.nodes(begin=4, end=6)
        [1, 2, 4, 6]
        >>> G.remove_node(2, begin=4, end=6)
        >>> G.nodes(begin=4, end=6)
        [4, 6]
        >>> G.nodes(data=True)
        [(1, {'time': '1pm'}), (2, {'time': '2pm'}), (4, {'time': '4pm'}), (6, {})]
        >>> G.remove_node(2)
        >>> G.nodes(data=True)
        [(1, {'time': '1pm'}), (4, {'time': '4pm'}), (6, {})]
        """

        if n not in self._node:
            return

        if begin is None and end is None:
            for iedge in list(self._adj[n].keys()):
                self.__remove_iedge(iedge)
        else:
            if begin is None:
                begin = self.tree.begin()

            if end is None:
                end = self.tree.end() + 1

            for iedge in self.tree[begin:end]:
                if iedge.data[0] == n or iedge.data[1] == n:
                    self.__remove_iedge(iedge)

        # delete the node and its attributes if no edge left
        if len(self._adj[n]) == 0:
            self._adj.pop(n, None)
            self._node.pop(n, None)

    def add_edge(self, u, v, begin, end, **attr):
        """Add an edge between u and v, during interval [begin, end).

        The nodes u and v will be automatically added if they are
        not already in the interval graph.

        Edge attributes can be specified with keywords or by directly
        accessing the edge's attribute dictionary. See examples below.

        Parameters
        ----------
        u, v : nodes
            Nodes can be, for example, strings or numbers.
            Nodes must be hashable (and not None) Python objects.
        begin: orderable type
            Inclusive beginning time of the edge appearing in the interval graph.
        end: orderable type
            Non-inclusive ending time of the edge appearing in the interval graph.
            Must be bigger than begin.
        attr : keyword arguments, optional
            Edge data (or labels or objects) can be assigned using
            keyword arguments.

        See Also
        --------
        add_edges_from : add a collection of edges

        Notes
        -----
        Adding an edge that already exists updates the edge data.

        Both begin and end must be the same type across all edges in the interval graph. Also, to create
        snapshots, both must be integers.

        Many NetworkX algorithms designed for weighted graphs use
        an edge attribute (by default `weight`) to hold a numerical value.

        Examples
        --------
        The following all add the edge e=(1, 2, 3, 10) to graph G:

        >>> G = dnx.IntervalGraph()
        >>> e = (1, 2, 3, 10)
        >>> G.add_edge(1, 2, 3, 10)           # explicit two-node form with interval
        >>> G.add_edge(*e)             # single edge as tuple of two nodes and interval
        >>> G.add_edges_from([(1, 2, 3, 10)])  # add edges from iterable container

        Associate data to edges using keywords:

        >>> G.add_edge(1, 2, 3, 10 weight=3)
        >>> G.add_edge(1, 3, 4, 9, weight=7, capacity=15, length=342.7)
        """

        iedge = self.__get_iedge_in_tree(begin, end, u, v)

        # if edge exists, just update attr
        if iedge is not None:
            # since both point to the same attr, updating one is enough
            self._adj[u][iedge].update(attr)
            return

        iedge = Interval(begin, end, (u, v))

        # add nodes
        if u not in self._node:
            self._adj[u] = {}
            self._node[u] = {}
        if v not in self._node:
            self._adj[v] = {}
            self._node[v] = {}

        # add edge
        try:
            self.tree.add(iedge)
        except ValueError:
            raise NetworkXError(
                "IntervalGraph: edge duration must be strictly bigger than zero {0}."
                .format(iedge))

        self._adj[u][iedge] = self._adj[v][iedge] = attr

    def add_edges_from(self, ebunch_to_add, **attr):
        """Add all the edges in ebunch_to_add.

        Parameters
        ----------
        ebunch_to_add : container of edges
            Each edge given in the container will be added to the
            interval graph. The edges must be given as as 4-tuples (u, v, being, end).
            Both begin and end must be orderable and the same type across all edges.
        attr : keyword arguments, optional
            Edge data (or labels or objects) can be assigned using
            keyword arguments.

        See Also
        --------
        add_edge : add a single edge

        Notes
        -----
        Adding the same edge (with the same interval) twice has no effect
        but any edge data will be updated when each duplicate edge is added.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)]) # using a list of edge tuples

        Associate data to edges

        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)], weight=3)
        >>> G.add_edges_from([(3, 4, 2, 19), (1, 4, 1, 3)], label='WN2898')
        """

        for e in ebunch_to_add:
            if len(e) != 4:
                raise NetworkXError(
                    "Edge tuple {0} must be a 4-tuple.".format(e))

            self.add_edge(e[0], e[1], e[2], e[3], **attr)

    def has_edge(self, u, v, begin=None, end=None, overlapping=True):
        """Return True if there exists an edge between u and v
        in the interval graph, during the given interval.

        Parameters
        ----------
        u, v : nodes
            Nodes can be, for example, strings or numbers.
            Nodes must be hashable (and not None) Python objects.
        begin : integer, optional (default= beginning of the entire interval graph)
            Inclusive beginning time of the node appearing in the interval graph.
        end : integer, optional (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the node appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.
        overlapping : bool, optional (default= True)
            if True, it returns True if there exists an edge between u and v with
            overlapping interval with `begin` and `end`.
            if False, it returns true only if there exists an edge between u and v
            with the exact interval.
            Note: if False, both `begin` and `end` must be defined, otherwise
            an exception is raised.

        Raises
        ------
        NetworkXError
            If `begin` and `end` are not defined and `overlapping= False`

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11)])
        >>> G.has_edge(1, 2)
        True

        With specific overlapping interval:
        
        >>> G.has_edge(1, 2, begin=2)
        True
        >>> G.has_edge(2, 4, begin=12)
        False

        Exact interval match:

        >>> G.has_edge(2, 4, begin=1, end=11)
        True
        >>> G.has_edge(2, 4, begin=2, end=11)
        False
        """

        if begin is None and end is None:
            for iv in self._adj[u].keys():
                if iv.data[0] == v or iv.data[1] == v:
                    return True
            return False

        if not overlapping:
            if begin is None or end is None:
                raise NetworkXError(
                    "For exact interval match (overlapping=False), both begin and end must be defined."
                )

            return self.__get_iedge_in_tree(u, v, begin, end) is not None

        if begin is None:
            begin = self.tree.begin()

        if end is None:
            end = self.tree.end() + 1

        for iv in self._adj[u].keys():
            if (iv.data[0] == v or iv.data[1] == v) and iv.overlaps(
                    begin=begin, end=end):
                return True
        return False

    def edges(self,
              u=None,
              v=None,
              begin=None,
              end=None,
              data=False,
              default=None):
        """A list of Interval objects of the IntervalGraph edges.

        All edges which are present within the given interval.

        All parameters are optional. `u` and `v` can be thought of as constraints.
        If no node is defined, all edges within the interval are returned.
        If one node is defined, all edges which have that node as one end,
        will be returned, and finally if both nodes are defined then all
        edges between the two nodes are returned.

        Parameters
        ----------
        u, v : nodes, optional (default=None)
            Nodes can be, for example, strings or numbers.
            Nodes must be hashable (and not None) Python objects.
            If the node does not exist in the graph, a key error is raised.
        begin: integer, optional  (default= beginning of the entire interval graph)
            Inclusive beginning time of the edge appearing in the interval graph.
        end: integer, optional  (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the edge appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.
        data : string or bool, optional (default=False)
            If True, return 2-tuple (Interval object, dict of attributes).
            If False, return just the Interval objects.
            If string (name of the attribute), return 2-tuple (Interval object, attribute value).
        default : value, optional (default=None)
            Default Value to be used for edges that don't have the requested attribute.
            Only relevant if `data` is a string (name of an attribute).

        Returns
        -------
        List of Interval objects
            An interval object has the following format: (begin, end, (u, v))

            When called, if `data` is False, a list of interval objects.
            If `data` is True, a list of 2-tuples: (Interval, dict of attribute(s) with values),
            If `data` is a string, a list of 2-tuples (Interval, attribute value).

        Examples
        --------
        To get a list of all edges:

        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)])
        >>> G.edges()
        [Interval(8, 15, (2, 4)), Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4)), Interval(12, 19, (6, 4))]

        To get edges which appear in a specific interval:

        >>> G.edges(begin=10)
        [Interval(12, 19, (6, 4)), Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))]
        >>> G.edges(end=5)
        [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4))]
        >>> G.edges(begin=2, end=4)
        [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4))]

        To get edges with either of the two nodes being defined:

        >>> G.edges(u=2)
        [Interval(3, 10, (1, 2)), Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))]
        >>> G.edges(u=2, begin=11)
        [Interval(1, 11, (2, 4)), Interval(8, 15, (2, 4))]
        >>> G.edges(u=2, v=4, end=8)
        [Interval(1, 11, (2, 4))]
        >>> G.edges(u=1, v=6)
        []

        To get a list of edges with data:

        >>> G = dnx.IntervalGraph()
        >>> G.add_edge(1, 3, 1, 4, weight=8, height=18)
        >>> G.add_edge(1, 2, 3, 10, weight=10)
        >>> G.add_edge(2, 6, 2, 10)
        >>> G.edges(data="weight")
        [(Interval(2, 8, (2, 3)), None), (Interval(3, 10, (1, 2)), 10), (Interval(1, 4, (1, 3)), 8)]
        >>> G.edges(data="weight", default=5)
        [(Interval(2, 8, (2, 3)), 5), (Interval(3, 10, (1, 2)), 10), (Interval(1, 4, (1, 3)), 8)]
        >>> G.edges(data=True)
        [(Interval(2, 8, (2, 3)), {}), (Interval(3, 10, (1, 2)), {'weight': 10}), (Interval(1, 4, (1, 3)), {'height': 18, 'weight': 8})]
        >>> G.edges(u=1, begin=5, end=9, data="weight")
        [(Interval(3, 10, (1, 2)), 10)]
        """

        # If non of the nodes are defined the interval tree is queried for the list of edges,
        # otherwise the edges are returned based on the nodes in the self._adj.o
        if u is None and v is None:
            if begin is None and end is None:
                iedges = self.tree.all_intervals
            # interval filtering
            else:
                if begin is None:
                    begin = self.tree.begin()
                if end is None:
                    end = self.tree.end() + 1

                iedges = self.tree[begin:end]

        else:
            # Node filtering
            if u is not None and v is not None:
                iedges = [
                    iv for iv in self._adj[u].keys()
                    if iv.data[0] == v or iv.data[1] == v
                ]
            elif u is not None:
                iedges = self._adj[u].keys()
            else:
                iedges = self._adj[v].keys()

            # Interval filtering
            if begin is not None and end is not None:
                iedges = [
                    iv for iv in iedges if iv.end >= begin and iv.begin < end
                ]
            elif begin is not None:
                iedges = [iv for iv in iedges if iv.end >= begin]
            elif end is not None:
                iedges = [iv for iv in iedges if iv.begin < end]

        # Appending attribute data if needed
        if data is False:
            return iedges if isinstance(iedges, list) else list(iedges)

        if data is True:
            return [(iv, self._adj[iv.data[0]][iv]) for iv in iedges]

        return [(iv, self._adj[iv.data[0]][iv][data])
                if data in self._adj[iv.data[0]][iv].keys() else (iv, default)
                for iv in iedges]

    def remove_edge(self, u, v, begin=None, end=None, overlapping=True):
        """Remove the edge between u and v in the interval graph,
        during the given interval.

        Quiet if the specified edge is not present.

        Parameters
        ----------
        u, v : nodes
            Nodes can be, for example, strings or numbers.
            Nodes must be hashable (and not None) Python objects.
        begin : integer, optional (default= beginning of the entire interval graph)
            Inclusive beginning time of the edge appearing in the interval graph.
        end : integer, optional (default= end of the entire interval graph + 1)
            Non-inclusive ending time of the edge appearing in the interval graph.
            Must be bigger than begin.
            Note that the default value is shifted up by 1 to make it an inclusive end.
        overlapping : bool, optional (default= True)
            if True, remove the edge between u and v with overlapping interval
            with `begin` and `end`.
            if False, remove the edge between u and v with the exact interval.
            Note: if False, both `begin` and `end` must be defined, otherwise
            an exception is raised.

        Raises
        ------
        NetworkXError
            If `begin` and `end` are not defined and `overlapping= False`

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 5, 9), (1, 2, 8, 15)])
        >>> G.remove_edge(1, 2)
        >>> G.has_edge(1, 2)
        False

        With specific overlapping interval

        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 5, 9), (1, 2, 8, 15)])
        >>> G.remove_edge(1, 2, begin=2, end=4)
        >>> G.has_edge(1, 2, begin=2, end=4)
        False
        >>> G.has_edge(1, 2)
        True

        Exact interval match

        >>> G.remove_edge(2, 4, begin=1, end=11, overlapping=False)
        >>> G.has_edge(2, 4, begin=1, end=11)
        False
        """
        # remove edge between u and v with the exact given interval
        if not overlapping:
            if begin is None or end is None:
                raise NetworkXError(
                    "For exact interval match (overlapping=False), both begin and end must be defined."
                )

            iedge = self.__get_iedge_in_tree(u, v, begin, end)
            if iedge is None:
                return
            self.__remove_iedge(iedge)
            return

        iedges_to_remove = []

        # remove every edge between u and v
        if begin is None and end is None:
            for iv in self._adj[u].keys():
                if iv.data[0] == v or iv.data[1] == v:
                    iedges_to_remove.append(iv)

        # remove edge between u and v with overlapping interval with the given interval
        if begin is None:
            begin = self.tree.begin()

        if end is None:
            end = self.tree.end() + 1

        for iv in self._adj[u].keys():
            if (iv.data[0] == v or iv.data[1] == v) and iv.overlaps(
                    begin=begin, end=end):
                iedges_to_remove.append(iv)

        # removing found iedges
        for iv in iedges_to_remove:
            self.__remove_iedge(iv)

    def __remove_iedge(self, iedge):
        """Remove the interval edge from the interval graph.

        Quiet if the specified edge is not present.

        Parameters
        ----------
        iedge : Interval object
            Interval edge to be removed.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edge(1, 2, 3, 10)
        >>> iedge = Interval(3, 10, (1, 2))   # Interval(begin, end, (u, v))
        >>> G.__remove_iedge(iedge)
        """
        self.tree.discard(iedge)
        self._adj[iedge.data[0]].pop(iedge, None)
        self._adj[iedge.data[1]].pop(iedge, None)

    def __get_iedge_in_tree(self, u, v, begin, end):
        """Return interval edge if found in the interval graph with the exact interval,
        otherwise return None.

        Parameters
        ----------
        u, v : nodes
            Nodes can be, for example, strings or numbers.
            Nodes must be hashable (and not None) Python objects.
        begin : integer
            Inclusive beginning time of the edge appearing in the interval graph.
        end : integer
            Non-inclusive ending time of the edge appearing in the interval graph.
            Must be bigger than begin.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edge(1, 2, 3, 10)
        >>> G.__get_iedge_in_tree(2, 1, 3, 10)
        Interval(3, 10, (1, 2))
        >>> G.__get_iedge_in_tree(2, 1, 4, 10)
        None
        """

        temp_iedge = Interval(begin, end, (u, v))
        if temp_iedge in self.tree:
            return temp_iedge

        temp_iedge = Interval(begin, end, (v, u))
        if temp_iedge in self.tree:
            return temp_iedge

        return None

    def to_subgraph(self,
                    begin,
                    end,
                    multigraph=False,
                    edge_data=False,
                    edge_interval_data=False,
                    node_data=False):
        """Return a networkx Graph or MultiGraph which includes all the nodes and
        edges which have overlapping intervals with the given interval.

        Parameters
        ----------
        begin: integer
            Inclusive beginning time of the edge appearing in the interval graph.
            Must be bigger than begin.
        end: integer
            Non-inclusive ending time of the edge appearing in the interval graph.
        multigraph: bool, optional (default= False)
            If True, a networkx MultiGraph will be returned. If False, networkx Graph.
        edge_data: bool, optional (default= False)
            If True, edges will keep their attributes.
        edge_interval_data: bool, optional (default= False)
            If True, each edge's attribute will also include its begin and end interval data.
            If `edge_data= True` and there already exist edge attributes with names begin and end,
            they will be overwritten.
        node_data : bool, optional (default= False)
            if True, each node's attributes will be included.

        See Also
        --------
        to_snapshots : divide the interval graph to snapshots

        Notes
        -----
        If multigraph= False, and edge_data=True or edge_interval_data=True,
        in case there are multiple edges, only one will show with one of the edge's attributes.

        Note: nodes with no edges will not appear in any subgraph.

        Examples
        --------
        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)])
        >>> H = G.to_subgraph(4, 12)
        >>> type(H)
        <class 'networkx.classes.graph.Graph'>
        >>> list(H.edges(data=True))
        [(1, 2, {}), (2, 4, {})]

        >>> H = G.to_subgraph(4, 12, edge_interval_data=True)
        >>> type(H)
        <class 'networkx.classes.graph.Graph'>
        >>> list(H.edges(data=True))
        [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 15, 'begin': 8})]

        >>> M = G.to_subgraph(4, 12, multigraph=True, edge_interval_data=True)
        >>> type(M)
        <class 'networkx.classes.multigraph.MultiGraph'>
        >>> list(M.edges(data=True))
        [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1}), (2, 4, {'end': 15, 'begin': 8})]
        """

        if end <= begin:
            raise NetworkXError(
                "IntervalGraph: subgraph duration must be strictly bigger than zero: "
                "begin: {}, end: {}.".format(begin, end))

        iedges = self.tree[begin:end]

        if multigraph:
            G = MultiGraph()
        else:
            G = Graph()

        if edge_data and edge_interval_data:
            G.add_edges_from((iedge.data[0], iedge.data[1],
                              dict(self._adj[iedge.data[0]][iedge],
                                   begin=iedge.begin,
                                   end=iedge.end)) for iedge in iedges)
        elif edge_data:
            G.add_edges_from((iedge.data[0], iedge.data[1],
                              self._adj[iedge.data[0]][iedge].copy())
                             for iedge in iedges)
        elif edge_interval_data:
            G.add_edges_from((iedge.data[0], iedge.data[1], {
                'begin': iedge.begin,
                'end': iedge.end
            }) for iedge in iedges)
        else:
            G.add_edges_from(
                (iedge.data[0], iedge.data[1]) for iedge in iedges)

        # include node attributes
        if node_data:
            G.add_nodes_from((n, self._node[n].copy()) for n in G.nodes)

        return G

    def to_snapshots(self,
                     number_of_snapshots,
                     multigraph=False,
                     edge_data=False,
                     edge_interval_data=False,
                     node_data=False,
                     return_length=False):
        """Return a list of networkx Graph or MultiGraph objects as snapshots
        of the interval graph in consecutive order.

        Parameters
        ----------
        number_of_snapshots : integer
            Number of snapshots to divide the interval graph into.
            Must be bigger than 1.
        multigraph : bool, optional (default= False)
            If True, a networkx MultiGraph will be returned. If False, networkx Graph.
        edge_data: bool, optional (default= False)
            If True, edges will keep their attributes.
        edge_interval_data : bool, optional (default= False)
            If True, each edge's attribute will also include its begin and end interval data.
            If `edge_data= True` and there already exist edge attributes with names begin and end,
            they will be overwritten.
        node_data : bool, optional (default= False)
            if True, each node's attributes will be included.
        return_length : bool, optional (default= False)
            If true, the length of snapshots will be returned as the second argument.

        See Also
        --------
        to_subgraph : subgraph based on an interval

        Notes
        -----
        In order to create snapshots, begin and end interval objects of the interval graph must be numbers.

        If multigraph= False, and edge_data=True or edge_interval_data=True,
        in case there are multiple edges, only one will show with one of the edge's attributes.

        Examples
        --------
        Snapshots of NetworkX Graph

        >>> G = dnx.IntervalGraph()
        >>> G.add_edges_from([(1, 2, 3, 10), (2, 4, 1, 11), (6, 4, 12, 19), (2, 4, 8, 15)])
        >>> S, l = G.to_snapshots(2, edge_interval_data=True, return_length=True)
        >>> S
        [<networkx.classes.graph.Graph object at 0x100000>, <networkx.classes.graph.Graph object at 0x150d00>]
        >>> l
        9.0
        >>> for g in S:
        >>> ... g.edges(data=True))
        [(1, 2, {'begin': 3, 'end': 10}), (2, 4, {'begin': 8, 'end': 15})]
        [(2, 4, {'begin': 8, 'end': 15}), (4, 6, {'begin': 12, 'end': 19})]

        Snapshots of NetworkX MultiGraph

        >>> S, l = G.to_snapshots(3, multigraph=True, edge_interval_data=True, return_length=True)
        >>> S
        [<networkx.classes.multigraph.MultiGraph object at 0x1060d40b8>, <networkx.classes.multigraph.MultiGraph object at 0x151020c9e8>, <networkx.classes.multigraph.MultiGraph object at 0x151021d390>]
        >>> l
        6.0
        >>> for g in S:
        >>> ... g.edges(data=True))
        [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1})]
        [(1, 2, {'end': 10, 'begin': 3}), (2, 4, {'end': 11, 'begin': 1}), (2, 4, {'end': 15, 'begin': 8}), (4, 6, {'end': 19, 'begin': 12})]
        [(2, 4, {'end': 15, 'begin': 8}), (4, 6, {'end': 19, 'begin': 12})]
        """

        if number_of_snapshots < 2 or type(number_of_snapshots) is not int:
            raise NetworkXError(
                "IntervalGraph: number of snapshots must be an integer and 2 or bigger. "
                "{0} was passed.".format(number_of_snapshots))

        begin, end = self.interval()
        snapshot_len = (end - begin) / number_of_snapshots

        snapshots = []
        end_inclusive_addition = 0
        for i in range(number_of_snapshots):
            # since to_subgraph is end non-inclusive, shift the end up by 1 to include end in the last snapshot.
            if i == number_of_snapshots - 1:
                end_inclusive_addition = 1

            snapshots.append(
                self.to_subgraph(begin + snapshot_len * i,
                                 begin + snapshot_len * (i + 1) +
                                 end_inclusive_addition,
                                 multigraph=multigraph,
                                 edge_data=edge_data,
                                 edge_interval_data=edge_interval_data,
                                 node_data=node_data))
        if return_length:
            return snapshots, snapshot_len

        return snapshots

    @staticmethod
    def load_from_txt(path, delimiter=" ", nodetype=None, comments="#"):
        """Read interval graph in from path.
           Every line in the file must be an edge in the following format: "node node begin end".
           Both interval times must be integers. Nodes can be any hashable objects.

        Parameters
        ----------
        path : string or file
           Filename to read.

        nodetype : Python type, optional
           Convert nodes to this type.

        comments : string, optional
           Marker for comment lines

        delimiter : string, optional
           Separator for node labels.  The default is whitespace.

        Returns
        -------
        G: IntervalGraph
            The graph corresponding to the lines in edge list.

        Examples
        --------
        >>> G=dnx.IntervalGraph.load_from_txt("my_dygraph.txt")

        The optional nodetype is a function to convert node strings to nodetype.

        For example

        >>> G=dnx.IntervalGraph.load_from_txt("my_dygraph.txt", nodetype=int)

        will attempt to convert all nodes to integer type.

        Since nodes must be hashable, the function nodetype must return hashable
        types (e.g. int, float, str, frozenset - or tuples of those, etc.)
        """

        ig = IntervalGraph()

        with open(path, 'r') as file:
            for line in file:
                p = line.find(comments)
                if p >= 0:
                    line = line[:p]
                if not len(line):
                    continue

                line = line.rstrip().split(delimiter)
                u, v, begin, end = line

                if nodetype is not None:
                    try:
                        u = nodetype(u)
                        v = nodetype(v)
                    except:
                        raise TypeError(
                            "Failed to convert node to type {0}".format(
                                nodetype))

                try:
                    begin = int(begin)
                    end = nodetype(end)
                except:
                    raise TypeError("Failed to convert time to type int")

                ig.add_edge(u, v, begin, end)

        return ig

コード例 #15

ファイルを表示

class TaskSet(object):
    """
    Holds a set of tasks in a priority queue.
  """
    def __init__(self):
        self._tasksQueue = TaskUnitPriorityQueue()  # keep r1 < r2 < r3 order.
        self._intervalTree = IntervalTree()

    @property
    def tasks(self):
        return self._tasksQueue.items()

    def add(self, task):
        if not self._tasksQueue.contains(task.taskID):
            self._addTaskToTree(task)
            self._tasksQueue.push(task)
        else:
            raise DuplicateTaskException

    def _addTaskToTree(self, task):
        """
      Adds task to interval tree.
    """
        self._intervalTree.addi(begin=task.release,
                                end=task.deadline,
                                data=task.taskID)

    def remove(self, task):
        self._intervalTree.discardi(task.release, task.deadline, task.taskID)
        self._tasksQueue.remove(task.taskID)

    def _findLatestInterval(self, intervals):
        """
      Find the latest interval.
    """
        latest = intervals[0]
        for interval in intervals:
            if interval.begin > latest.begin:
                latest = interval
        return latest

    def _orIntervals(self, intervalListA, intervalListB):
        return list(set(intervalListA) | set(intervalListB))

    def _conflictPath(self, interval, intervalTree):
        """
      @param interval The interval to find conflicts with.
      @param intervalTree The intervalTree that contains all intervals
      Finds the longest number of intervals that are all overlapping (conflicting).
        For example:
          if A and B conflict and B and C conflict and A is the
          interval we're looking for conflicts with, the returned
          intervals will be A, B, C.
        Another example:
          if D and E conflict and F and G conflict, and we're looking
          for all conflicts with D, only D and E will be returned as
          F and G are not overlapping with either D and E.
    """
        intervals = list(intervalTree.search(interval))
        # if only one interval, check if its the one we're
        # trying to find conflicts with.
        if len(intervals) == 1 and intervals[0] == interval:
            return []
        # now find the latest of all the intervals and get all conflicts
        # with and keep going until there are no more conflicts.
        latestInterval = self._findLatestInterval(intervals)
        # remove all the conflicts, we dont need to check them again.
        intervalTree.remove_overlap(interval)
        # put the latest conflict back into the tree and find its conflicts
        intervalTree.add(latestInterval)
        # now go find all conflicts with the latest interval until there are none.
        return self._orIntervals(
            intervals, self._conflictPath(latestInterval, intervalTree))

    def _intervalConflictAlreadyDetected(self, interval, conflicts):
        """
      Checks to see if interval was already detected to conflict.
    """
        for conflict in conflicts:
            for ival in conflict:
                if ival == interval:
                    return True
        return False

    def findConflicts(self):
        """
      Finds all conflicts within the task set.
    """
        begin = self._intervalTree.begin()
        end = self._intervalTree.end()
        conflicts = []
        conflictObjs = []
        nonConflictsObjs = []
        intervals = sorted(self._intervalTree[begin:end])
        for interval in intervals:
            # check if this interval was already detected to conflict
            if self._intervalConflictAlreadyDetected(interval, conflicts):
                continue
            conflictIntervals = self._conflictPath(interval,
                                                   self._intervalTree.copy())
            if len(conflictIntervals) > 0:  # there was a  conflict
                conflicts.append(conflictIntervals)
                conflictObjs.append(Conflict(conflictIntervals))
            else:
                nonConflictsObjs.append(Conflict(interval))
        return ConflictSet(conflictObjs), ConflictSet(nonConflictsObjs)

    def __iter__(self):
        return self._tasksQueue

コード例 #16

ファイルを表示

ファイル: precise_extension.py プロジェクト: JulienPPichon/Precise_extension

def precise_extension(dict_transcript, dict_exon_signal, gene_col,
                      coverage_stringtie):
    precisely_extended_dict = {}
    overlapped_transcripts = []
    coverage = coverage_stringtie * 200  # Average length of an exon = 200pb.
    # Boolean if the introns of a gene car be the exon of an other one.
    intron_exon = False
    for chromosome in dict_transcript:
        # Create a new dictionnary with the same model than dict_transcript.
        precisely_extended_dict[str(chromosome)] = IntervalTree()
        for transcript in sorted(dict_transcript[chromosome]):
            overlap_start = 0
            # Introduce the boolean extension with false as default for each transcript.
            extension = False
            # Case where the transcript is from the positive strand.
            if transcript.data[0][6] == "+":
                # Check if there is others transcripts in the area to extend.
                if len(dict_transcript[chromosome][transcript.end +
                                                   1:transcript.end +
                                                   5001]) != 0:
                    exons_it = IntervalTree()
                    introns_it = IntervalTree()
                    max_extension = 0
                    for transcript_in_iv in sorted(
                            dict_transcript[chromosome][transcript.end +
                                                        1:transcript.end +
                                                        5001]):
                        # If others transcripts are from the same strand but not the same gene, store in an IV the exons and the overlapping start.
                        if transcript_in_iv.data[0][
                                gene_col] != transcript.data[0][
                                    gene_col] and transcript_in_iv.data[0][
                                        6] == "+":
                            if overlap_start == 0:
                                if transcript_in_iv.begin > transcript.end:
                                    overlap_start = transcript_in_iv.begin
                                # If transcripts are already overlapping before extension, error in the original GTF.
                                else:
                                    overlap_start = transcript.end + 1
                                    overlapped_transcripts.append(transcript)
                            for exon_in_transcript in transcript_in_iv.data:
                                if int(exon_in_transcript[3]) > transcript.end:
                                    exons_it[int(exon_in_transcript[3]) +
                                             1:int(exon_in_transcript[4]
                                                   )] = "exon"
                                else:
                                    continue
                    # Comeback to the case where there is an overlapping issue.
                    if len(exons_it) > 1:
                        # If there is a signal in the area where overlapping start in the stringtie output.
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [overlap_start:transcript.end +
                                    5001]) != 0 and intron_exon == True:
                                exons_it.merge_overlaps()
                                # Convert the exon intervaltree in a intron one.
                                for exon_number, exons in enumerate(
                                        sorted(exons_it)):
                                    if exon_number == 0:
                                        previous_end = exons.end
                                    else:
                                        introns_it[previous_end +
                                                   1:exons.begin] = "intron"
                                        previous_end = exons.end
                                # Check if signal overlap introns and assign max extension in consequence.
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [overlap_start:introns_it.end()],
                                        reverse=True):
                                    if signal.data[0] == "+":
                                        for intron in sorted(introns_it,
                                                             reverse=True):
                                            if signal.end > intron.begin and signal.begin < intron.end and signal.end <= transcript.end + 5001:
                                                if signal.end < intron.end:
                                                    max_extension = signal.end
                                                else:
                                                    max_extension = intron.end
                                                extension = True
                                                break
                                        if max_extension != 0:
                                            new_transcript_end = max_extension
                                            break
                                    else:
                                        continue
                                # Case where no signal overlap introns.
                                if max_extension == 0:
                                    if len(dict_exon_signal[chromosome]
                                           [transcript.end +
                                            1:overlap_start]) != 0:
                                        for signal in sorted(
                                                dict_exon_signal[chromosome]
                                            [transcript.end + 1:overlap_start],
                                                reverse=True):
                                            if signal.data[
                                                    0] == "+" and signal.end <= transcript.end + 5001:
                                                new_transcript_end = signal.end
                                                extension = True
                                                break
                                    else:
                                        extension = False
                            # Case where no signal overlap transcripts.
                            else:
                                if len(
                                        dict_exon_signal[chromosome]
                                    [transcript.end + 1:overlap_start]) != 0:
                                    for signal in sorted(
                                            dict_exon_signal[chromosome]
                                        [transcript.end + 1:overlap_start],
                                            reverse=True):
                                        if signal.data[
                                                0] == "+" and signal.end <= transcript.end + 5001 and signal.end < overlap_start:
                                            if signal.data[1] * (
                                                    signal.end -
                                                    signal.begin) > coverage:
                                                new_transcript_end = signal.end
                                                extension = True
                                                break
                        else:
                            extension = False
                    elif len(exons_it) == 1:
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [transcript.end + 1:exons_it.begin()]) != 0:
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [transcript.end:exons_it.begin()],
                                        reverse=True):
                                    if signal.data[
                                            0] == "+" and signal.end <= exons_it.begin(
                                            ) - 1:
                                        if signal.data[1] * (
                                                signal.end -
                                                signal.begin) > coverage:
                                            new_transcript_end = signal.end
                                            extension = True
                                            break
                            else:
                                extension = False
                        else:
                            extension = False

                    else:
                        # If there is a signal present from the stringtie output overlapping from the end of the transcript to an inputted value, save the signal's end.
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [transcript.end + 1:transcript.end +
                                    5001]) != 0:
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [transcript.end:transcript.end + 5001],
                                        reverse=True):
                                    if signal.data[
                                            0] == "+" and signal.end <= transcript.end + 5001:
                                        if signal.data[1] * (
                                                signal.end -
                                                signal.begin) > coverage:
                                            new_transcript_end = signal.end
                                            extension = True
                                            break
                            else:
                                extension = False
                        else:
                            extension = False
                # When extension is true, end of the transcript is changed with the signal's end and added to the new dict.
                if extension is True:
                    modified_transcript = copy.deepcopy(transcript)
                    modified_transcript.data[-1][4] = str(new_transcript_end)
                    modified_transcript.data[-1][1] = "BestScriptEver"
                    modified_transcript.data[-1].append(
                        "extension +" +
                        str(new_transcript_end - transcript.end))
                    precisely_extended_dict[chromosome][
                        int(transcript.begin):int(new_transcript_end
                                                  )] = modified_transcript.data
                # Otherwise, unmodified transcript is added.
                else:
                    precisely_extended_dict[chromosome][
                        int(transcript.begin):int(transcript.end
                                                  )] = transcript.data

            # Case where the transcript is from the negative strand.
            if transcript.data[0][6] == "-":
                # Check if there is others transcripts in the area to extend.
                if len(dict_transcript[chromosome]
                       [transcript.begin - 5000:transcript.begin]) != 0:
                    exons_it = IntervalTree()
                    introns_it = IntervalTree()
                    max_extension = 0
                    for transcript_in_iv in sorted(
                            dict_transcript[chromosome][transcript.begin -
                                                        5000:transcript.begin],
                            reverse=True):
                        # If others transcripts are from the same strand but not the same gene, store in an IV the exons and the overlapping start.
                        if transcript_in_iv.data[0][
                                gene_col] != transcript.data[0][
                                    gene_col] and transcript_in_iv.data[0][
                                        6] == "-":
                            if overlap_start == 0:
                                if transcript_in_iv.begin < transcript.begin:
                                    overlap_start = transcript_in_iv.end
                                # If transcripts are already overlapping before extension, error in the original GTF.
                                else:
                                    overlap_start = transcript.begin - 1
                                    overlapped_transcripts.append(transcript)
                            for exon_in_transcript in transcript_in_iv.data:
                                if int(exon_in_transcript[4]
                                       ) < transcript.begin:
                                    exons_it[int(exon_in_transcript[3]) +
                                             1:int(exon_in_transcript[4]
                                                   )] = "exon"
                                else:
                                    continue

                    # Comeback to the case where there is an overlapping issue.
                    if len(exons_it) > 1:
                        # If there is a signal in the area where overlapping start in the stringtie output.
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [transcript.begin - 5000:overlap_start +
                                    1]) != 0 and intron_exon == True:
                                exons_it.merge_overlaps()
                                # Convert the exon intervaltree in a intron one.
                                for exon_number, exons in enumerate(
                                        sorted(exons_it)):
                                    if exon_number == 0:
                                        previous_end = exons.end
                                    else:
                                        introns_it[previous_end +
                                                   1:exons.begin] = "intron"
                                        previous_end = exons.end
                                # Check if signal overlap introns and assign max extension in consequence.
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [introns_it.begin():overlap_start + 1]):
                                    if signal.data[0] == "-":
                                        for intron in sorted(introns_it):
                                            if signal.begin < intron.end and signal.end > intron.begin and signal.begin >= transcript.begin - 5000:
                                                if signal.begin > intron.begin:
                                                    max_extension = signal.begin
                                                else:
                                                    max_extension = intron.begin
                                                extension = True
                                                break
                                        if max_extension != 0:
                                            new_transcript_end = max_extension
                                            break
                                    else:
                                        continue
                                # Case where no signal overlap introns.
                                if max_extension == 0:
                                    if len(
                                            dict_exon_signal[chromosome]
                                        [overlap_start:transcript.begin]) != 0:
                                        for signal in sorted(
                                                dict_exon_signal[chromosome]
                                            [overlap_start:transcript.begin]):
                                            if signal.data[
                                                    0] == "-" and signal.begin >= transcript.begin - 5001:
                                                new_transcript_end = signal.begin
                                                extension = True
                                                break
                                    else:
                                        extension = False
                            # Case where no signal overlap transcripts.
                            else:
                                if len(dict_exon_signal[chromosome]
                                       [overlap_start:transcript.begin]) != 0:
                                    for signal in sorted(
                                            dict_exon_signal[chromosome]
                                        [overlap_start:transcript.begin]):
                                        if signal.data[
                                                0] == "-" and signal.begin >= transcript.begin - 5001 and signal.begin > overlap_start:
                                            if signal.data[1] * (
                                                    signal.end -
                                                    signal.begin) > coverage:
                                                new_transcript_end = signal.begin
                                                extension = True
                                                break
                        else:
                            extension = False
                    elif len(exons_it) == 1:
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [exons_it.end():transcript.begin]) != 0:
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [exons_it.end():transcript.begin]):
                                    if signal.data[
                                            0] == "-" and signal.begin >= exons_it.end(
                                            ) + 1:
                                        if signal.data[1] * (
                                                signal.end -
                                                signal.begin) > coverage:
                                            new_transcript_end = signal.begin
                                            extension = True
                                            break
                            else:
                                extension = False
                        else:
                            extension = False
                    else:
                        # If there is a signal present from the stringtie output overlapping from the end of the transcript to an inputted value, save the signal's end.
                        if chromosome in dict_exon_signal:
                            if len(dict_exon_signal[chromosome]
                                   [transcript.begin -
                                    5000:transcript.begin]) != 0:
                                for signal in sorted(
                                        dict_exon_signal[chromosome]
                                    [transcript.begin -
                                     5000:transcript.begin]):
                                    if signal.data[
                                            0] == "-" and signal.begin >= transcript.begin - 5000:
                                        if signal.data[1] * (
                                                signal.end -
                                                signal.begin) > coverage:
                                            new_transcript_end = signal.begin
                                            extension = True
                                            break
                            else:
                                extension = False
                        else:
                            extension = False
                # When extension is true, end of the transcript is changed with the signal's end and added to the new dict.
                if extension is True:
                    modified_transcript = copy.deepcopy(transcript)
                    modified_transcript.data[0][3] = str(new_transcript_end)
                    modified_transcript.data[0][1] = "BestScriptEver"
                    modified_transcript.data[0].append("extension " +
                                                       str(new_transcript_end -
                                                           transcript.begin))
                    precisely_extended_dict[chromosome][
                        int(new_transcript_end
                            ):int(transcript.end)] = modified_transcript.data
                # Otherwise, unmodified transcript is added.
                else:
                    precisely_extended_dict[chromosome][
                        int(transcript.begin):int(transcript.end
                                                  )] = transcript.data
    with open("errors_file.txt", "w") as filout:
        for ovlp_transcript in overlapped_transcripts:
            filout.write("{}\n".format(ovlp_transcript.data[0]))
    return precisely_extended_dict

コード例 #17

ファイルを表示

class TemporalNodeCollection(NodeCollection):
    """A collection of temporal nodes"""
    def __init__(self, *args, **kwargs) -> None:
        """Initialize the NodeCollection object."""

        # initialize the base class
        super().__init__(*args, **kwargs)

        # initialize an intervaltree to save events
        self._events = IntervalTree()

        # class of objects
        self._default_class: Any = TemporalNode

    @singledispatchmethod
    def __getitem__(self, key: Any) -> Any:
        return super().__getitem__(key)

    @__getitem__.register(slice)  # type: ignore
    @__getitem__.register(int)  # type: ignore
    @__getitem__.register(float)  # type: ignore
    def _(self, key: Union[int, float, slice]) -> Any:
        # pylint: disable=arguments-differ
        start, end, _ = _get_start_end(key)
        for start, end, uid in sorted(self._events[start:end]):
            for obj in self[uid][start:end]:
                yield obj

    @property
    def start(self):
        """start of the object"""
        return self._events.begin()

    @property
    def end(self):
        """end of the object"""
        return self._events.end()

    @property
    def events(self):
        """Temporal events"""
        return self._events

    @singledispatchmethod
    def add(self, *args, **kwargs: Any) -> None:
        """Add multiple nodes. """
        super().add(*args, **kwargs)

    def _add(self, obj: Any, **kwargs: Any) -> None:
        """Add an node to the set of nodes."""
        super()._add(obj, **kwargs)
        start, end, _ = obj.last()
        self._events[start:end] = obj.uid

    def _if_exist(self, obj: Any, **kwargs: Any) -> None:
        """Helper function if node already exists."""
        count: int = kwargs.pop('count', 1)
        element = self[obj.relations]
        element.event(**kwargs)
        start, end, _ = obj.last()
        self._events[start:end] = element.uid

    def _remove(self, obj) -> None:
        """Add an edge to the set of edges."""
        for interval in sorted(self._events):
            if interval.data == obj.uid:
                self._events.remove(interval)
        super()._remove(obj)

コード例 #18

ファイルを表示

ファイル: temporal.py プロジェクト: vishalbelsare/pathpy-1

class TemporalPathPyObject(PathPyObject):
    """Base class for a temporal object."""
    def __init__(self, uid: Optional[str] = None, **kwargs: Any) -> None:
        """Initialize the temporal object."""

        # initialize the parent class
        super().__init__(uid=uid)

        # default start and end time of the object
        self._start = float('-inf')
        self._end = float('inf')

        # initialize an intervaltree to save events
        self._events = IntervalTree()

        # add new events
        self.event(**kwargs)

        # variable to store changes in the events
        self._len_events = len(self._events)

    def __iter__(self):
        self._clean_events()

        # create generator
        for start, end, attributes in sorted(self._events):
            self._attributes = {**{'start': start, 'end': end}, **attributes}
            yield self
        self._attributes.pop('start', None)
        self._attributes.pop('end', None)

    @singledispatchmethod
    def __getitem__(self, key: Any) -> Any:
        self._clean_events()
        # get the last element
        _, _, last = self.last()
        return last.get(key, None)

    @__getitem__.register(tuple)  # type: ignore
    def _(self, key: tuple) -> Any:
        start, end, _ = _get_start_end(key[0])
        values = {
            k: v
            for _, _, o in sorted(self._events[start:end])
            for k, v in o.items()
        }
        return values.get(key[1], None) if len(key) == 2 else values

    @__getitem__.register(slice)  # type: ignore
    @__getitem__.register(int)  # type: ignore
    @__getitem__.register(float)  # type: ignore
    def _(self, key: Union[int, float, slice]) -> Any:
        start, end, _ = _get_start_end(key)
        self._clean_events()

        # create generator
        for start, end, attributes in sorted(self._events[start:end]):
            self._attributes = {**{'start': start, 'end': end}, **attributes}
            yield self
        self._attributes.pop('start', None)
        self._attributes.pop('end', None)

    @singledispatchmethod
    def __setitem__(self, key: Any, value: Any) -> None:
        self.event(start=self._events.begin(),
                   end=self._events.end(),
                   **{key: value})

    @__setitem__.register(tuple)  # type: ignore
    def _(self, key: tuple, value: Any) -> None:
        start, end, _ = _get_start_end(key[0])
        self.event(start=start, end=end, **{key[1]: value})

    @property
    def start(self):
        """start of the object"""
        return self.attributes.get('start', self._start)

    @property
    def end(self):
        """end of the object"""
        return self.attributes.get('end', self._end)

    def _clean_events(self):
        """helper function to clean events"""

        # BUG: There is a bug in the intervaltree library
        # merge_equals switches old and new data randomly
        def reducer(old, new):
            return {**old, **new}

        if len(self._events) != self._len_events:
            # split overlapping intervals
            self._events.split_overlaps()

            # combine the dict of the overlapping intervals
            self._events.merge_equals(data_reducer=reducer)

            # update the length of the events
            self._len_events = len(self._events)

    def event(self, *args, **kwargs) -> None:
        """Add a temporal event."""

        # check if object is avtive or inactive
        active = kwargs.pop('active', True)

        # get start and end time of the even
        start, end, kwargs = _get_start_end(*args, **kwargs)

        if active:
            self._events[start:end] = kwargs  # type: ignore
            self._attributes = kwargs.copy()
        else:
            self._events.chop(start, end)

        # update start and end times
        self._start = self._events.begin()
        self._end = self._events.end()

    def last(self):
        """return the last added intervall"""
        interval = sorted(self._events)[-1]
        return interval.begin, interval.end, interval.data