예제 #1
0
    def dilate(self, window, axis=None):
        """Expand the range of every interval in the set along some axis.

        Args:
            window (Number): The amount to extend at each end-point of the
                range. The actual interval will grow by 2*window. Use negative
                number to shrink intervals.
            axis (optional): The axis to dilate on. Represented as a pair of
                co-ordinates, such as ``('t1', 't2')``. Defaults to ``None``,
                which uses the ``primary_axis`` of ``self``.

        Returns:
            A new IntervalSet with the dilated intervals.
        """
        if axis is None:
            axis = self._primary_axis

        def dilate_bounds(b, window, axis):
            new_bounds = b.copy()
            new_bounds[axis[0]] -= window
            new_bounds[axis[1]] += window
            return new_bounds

        return self.map(lambda intrvl: Interval(
            dilate_bounds(intrvl['bounds'], window, axis), intrvl['payload']))
예제 #2
0
 def map_output(intrvlself, intrvlothers):
     intrvls_to_nest = IntervalSet(
         [i for i in intrvlothers if predicate(intrvlself, i)])
     if not intrvls_to_nest.empty() or not filter_empty:
         return [
             Interval(intrvlself['bounds'].copy(),
                      (intrvlself['payload'], intrvls_to_nest))
         ]
     return []
예제 #3
0
        def compute_difference(intrvl, overlapped_intervals):
            """Returns a list of intervals that are what is left of intrvl
            after subtracting all overlapped_intervals.

            Expects overlapped_intervals to be sorted by (axis[0], axis[1]).
            """
            start = intrvl[axis[0]]
            overlapped_index = 0
            output = []
            while start < intrvl[axis[1]]:
                # Each iteration proposes an interval starting at `start`
                # If no overlapped interval goes acoss `start`, then it is
                # a valid start for an interval after the subtraction.
                intervals_across_start = []
                first_interval_after_start = None
                new_overlapped_index = None
                for idx, overlap in enumerate(
                        overlapped_intervals[overlapped_index:]):
                    v1 = overlap[axis[0]]
                    v2 = overlap[axis[1]]
                    if new_overlapped_index is None and v2 > start:
                        new_overlapped_index = idx + overlapped_index
                    if v1 <= start and v2 > start:
                        intervals_across_start.append(overlap)
                    elif v1 > start:
                        # overlap is sorted by (axis[0], axis[1])
                        first_interval_after_start = overlap
                        break
                if len(intervals_across_start) == 0:
                    # start is valid, now finds an end point
                    if first_interval_after_start is None:
                        end = intrvl[axis[1]]
                        new_start = end
                    else:
                        end = first_interval_after_start[axis[0]]
                        new_start = first_interval_after_start[axis[1]]
                    if end > start:
                        new_bounds = intrvl['bounds'].copy()
                        new_bounds[axis[0]] = start
                        new_bounds[axis[1]] = end
                        output.append(Interval(new_bounds, intrvl['payload']))
                    start = new_start
                else:
                    # start is invalid, now propose another start
                    start = max([i[axis[1]] for i in intervals_across_start])
                if new_overlapped_index is not None:
                    overlapped_index = new_overlapped_index
            return output
예제 #4
0
    def from_iterable(cls,
                      iterable,
                      key_parser,
                      bounds_parser,
                      payload_parser=lambda _: None,
                      progress=False,
                      total=None):
        """Constructs an IntervalSetMapping from an iterable.

        Args:
            iterable: An iterable of arbitrary elements. Each element will
                become an interval in the collection.
            key_parser: A function that takes an element in iterable and
                returns the key for the interval.
            bounds_parser: A function that takes an element in iterable and
                returns the bounds for the interval.
            payload_parser (optional): A function that takes an element in
                iterable and returns the payload for the interval.
                Defaults to producing None for all elements.
            progress (Bool, optional): Whether to display a progress bar using
                tqdm. Defaults to False.
            total (int, optional): Total number of elements in iterable.
                Only used to estimate ETA for the progress bar, and only takes 
                effect if progress is True. 

        Returns:
            A IntervalSetMapping constructed from iterable and the parsers
            provided.

        Note:
            Everything in iterable will be materialized in RAM.
        """
        key_to_intervals = {}
        for row in (tqdm(iterable, total=total)
                    if progress and total is not None else
                    tqdm(iterable) if progress else iterable):
            interval = Interval(bounds_parser(row), payload_parser(row))
            key = key_parser(row)
            if key in key_to_intervals:
                key_to_intervals[key].append(interval)
            else:
                key_to_intervals[key] = [interval]
        return cls({
            key: IntervalSet(intervals)
            for key, intervals in key_to_intervals.items()
        })
예제 #5
0
    def coalesce(self,
                 axis,
                 bounds_merge_op,
                 payload_merge_op=lambda p1, p2: p1,
                 predicate=None,
                 epsilon=0):
        """Recursively merge all intervals that are touching or overlapping
        along ``axis``. 

        Merge intervals in self if they meet, overlap, or are up to ``epsilon``
        apart along ``axis``. If a predicate is specified, intervals will be merged
        if they meet/overlap and satisfy the predicate. 
        Repeat the process until all such intervals are merged.

        Merges the bounds with ``bounds_merge_op`` and merges payloads with
        ``payload_merge_op``.

        Args:
            axis: The axis to coalesce on.
            bounds_merge_op: A function that takes two bounds and returns a
                merged version of both of them. Along ``axis``, this function
                should return a bound that spans the two bounds.
            payload_merge_op (optional): A function that takes in two payloads
                and merges them. Defaults to a function that returns the first
                of the two payloads.
            predicate (optional): A function that takes an interval that is 
                currently being coalesced and a new interval and returns 
                whether or not the two intervals should be merged. 
            epsilon (optional): The slack for judging if Intervals meet or
                overlap. Must be nonnegative. Defaults to 0 (no slack).

        Returns:
            A new IntervalSet of intervals that are disjoint along ``axis`` and
            are at least ``epsilon`` apart.
            
        """        
        if (len(self._intrvls) == 0):
            return self

        new_coalesced_intrvls = []

        #tracks all intervals that are currently experiencing merging
        current_intrvls = []

        sorted_intervals = self._intrvls.copy()
        sorted_intervals = sorted(sorted_intervals, key=lambda intrvl: (intrvl[axis[0]], intrvl[axis[1]]))

        for intrvl in sorted_intervals:
            new_current_intrvls = []
            for cur in current_intrvls:
                if Bounds.cast({
			        axis[0] : 't1',
			        axis[1] : 't2'
		        })(or_pred(overlaps(),
                    before(max_dist=epsilon)))(cur, intrvl):
                        #adds overlapping intervals to new_current_intrvls
                        new_current_intrvls.append(cur)            
                else:
                    #adds all non-overlapping intervals to new_coalesced_intrvls
                    new_coalesced_intrvls.append(cur)

            current_intrvls = new_current_intrvls
            matched_intrvl = None
            loc = len(current_intrvls) - 1

            #if current_intrvls is empty, we need to start constructing a new set of coalesced intervals
            if len(current_intrvls) == 0:
                current_intrvls.append(intrvl.copy())
                continue
            
            if predicate is None:
                matched_intrvl = current_intrvls[-1]
            else:
                for index, cur in enumerate(current_intrvls):
                    if predicate(cur, intrvl):
                        matched_intrvl = cur
                        loc = index

            #if no matching interval is found, this implies that intrvl should be the start of a new coalescing interval
            if matched_intrvl is None:
                current_intrvls.append(intrvl)
            else:
                current_intrvls[loc] = Interval(
                        bounds_merge_op(matched_intrvl['bounds'],
                                        intrvl['bounds']),
                        payload_merge_op(matched_intrvl['payload'],
                                        intrvl['payload'])
                    )

        for cur in current_intrvls:
            new_coalesced_intrvls.append(cur)
        
        return IntervalSet(new_coalesced_intrvls)
예제 #6
0
 def merge_fn(key, intervals):
     new_bounds = output_bounds.copy()
     new_bounds[axis[0]] = key[0]
     new_bounds[axis[1]] = key[1]
     return Interval(new_bounds, intervals)
예제 #7
0
 def map_fn(intrvl):
     return Interval(intrvl['bounds'], fn(intrvl['payload']))
예제 #8
0
    def coalesce(self,
                 axis,
                 bounds_merge_op,
                 payload_merge_op=lambda p1, p2: p1,
                 predicate=None,
                 epsilon=0):
        """Recursively merge all intervals that are touching or overlapping
        along ``axis``.

        Merge intervals in self if they meet, overlap, or are up to ``epsilon``
        apart along ``axis``. Repeat the process until all such intervals are
        merged.

        Merges the bounds with ``bounds_merge_op`` and merges payloads with
        ``payload_merge_op``.

        Args:
            axis: The axis to coalesce on.
            bounds_merge_op: A function that takes two bounds and returns a
                merged version of both of them. Along ``axis``, this function
                should return a bound that spans the two bounds.
            payload_merge_op (optional): A function that takes in two payloads
                and merges them. Defaults to a function that returns the first
                of the two payloads.
            epsilon (optional): The slack for judging if Intervals meet or
                overlap. Must be nonnegative. Defaults to 0 (no slack).

        Returns:
            A new IntervalSet of intervals that are disjoint along ``axis`` and
            are at least ``epsilon`` apart.
            
        """
        if (len(self._intrvls) == 0):
            return self

        new_coalesced_intrvls = []
        current_intrvls = []

        for intrvl in self._intrvls:
            for cur in current_intrvls:
                #adds any intervals that occured before the start of intrvl
                if not Bounds.cast({
                        axis[0]: 't1',
                        axis[1]: 't2'
                })(or_pred(overlaps(), before(max_dist=epsilon)))(cur, intrvl):
                    new_coalesced_intrvls.append(cur)
            #re-update contents of current_intrvls ==> contains all intervals that overlap
            current_intrvls = [
                cur for cur in current_intrvls if Bounds.cast({
                    axis[0]: 't1',
                    axis[1]: 't2'
                })(or_pred(overlaps(), before(max_dist=epsilon)))(cur, intrvl)
            ]

            matched_intrvl = None
            loc = len(current_intrvls) - 1

            if len(current_intrvls) == 0:
                current_intrvls.append(intrvl.copy())
                continue

            if predicate is None:
                matched_intrvl = current_intrvls[-1]
            else:
                for index, cur in enumerate(current_intrvls):
                    if predicate(cur, intrvl):
                        matched_intrvl = cur
                        loc = index

            if matched_intrvl is None:
                current_intrvls.append(intrvl)
            else:
                current_intrvls[loc] = Interval(
                    bounds_merge_op(matched_intrvl['bounds'],
                                    intrvl['bounds']),
                    payload_merge_op(matched_intrvl['payload'],
                                     intrvl['payload']))

        for cur in current_intrvls:
            new_coalesced_intrvls.append(cur)

        return IntervalSet(new_coalesced_intrvls)