def dilate(self, window, axis=None): """Expand the range of every interval in the set along some axis. Args: window (Number): The amount to extend at each end-point of the range. The actual interval will grow by 2*window. Use negative number to shrink intervals. axis (optional): The axis to dilate on. Represented as a pair of co-ordinates, such as ``('t1', 't2')``. Defaults to ``None``, which uses the ``primary_axis`` of ``self``. Returns: A new IntervalSet with the dilated intervals. """ if axis is None: axis = self._primary_axis def dilate_bounds(b, window, axis): new_bounds = b.copy() new_bounds[axis[0]] -= window new_bounds[axis[1]] += window return new_bounds return self.map(lambda intrvl: Interval( dilate_bounds(intrvl['bounds'], window, axis), intrvl['payload']))
def map_output(intrvlself, intrvlothers): intrvls_to_nest = IntervalSet( [i for i in intrvlothers if predicate(intrvlself, i)]) if not intrvls_to_nest.empty() or not filter_empty: return [ Interval(intrvlself['bounds'].copy(), (intrvlself['payload'], intrvls_to_nest)) ] return []
def compute_difference(intrvl, overlapped_intervals): """Returns a list of intervals that are what is left of intrvl after subtracting all overlapped_intervals. Expects overlapped_intervals to be sorted by (axis[0], axis[1]). """ start = intrvl[axis[0]] overlapped_index = 0 output = [] while start < intrvl[axis[1]]: # Each iteration proposes an interval starting at `start` # If no overlapped interval goes acoss `start`, then it is # a valid start for an interval after the subtraction. intervals_across_start = [] first_interval_after_start = None new_overlapped_index = None for idx, overlap in enumerate( overlapped_intervals[overlapped_index:]): v1 = overlap[axis[0]] v2 = overlap[axis[1]] if new_overlapped_index is None and v2 > start: new_overlapped_index = idx + overlapped_index if v1 <= start and v2 > start: intervals_across_start.append(overlap) elif v1 > start: # overlap is sorted by (axis[0], axis[1]) first_interval_after_start = overlap break if len(intervals_across_start) == 0: # start is valid, now finds an end point if first_interval_after_start is None: end = intrvl[axis[1]] new_start = end else: end = first_interval_after_start[axis[0]] new_start = first_interval_after_start[axis[1]] if end > start: new_bounds = intrvl['bounds'].copy() new_bounds[axis[0]] = start new_bounds[axis[1]] = end output.append(Interval(new_bounds, intrvl['payload'])) start = new_start else: # start is invalid, now propose another start start = max([i[axis[1]] for i in intervals_across_start]) if new_overlapped_index is not None: overlapped_index = new_overlapped_index return output
def from_iterable(cls, iterable, key_parser, bounds_parser, payload_parser=lambda _: None, progress=False, total=None): """Constructs an IntervalSetMapping from an iterable. Args: iterable: An iterable of arbitrary elements. Each element will become an interval in the collection. key_parser: A function that takes an element in iterable and returns the key for the interval. bounds_parser: A function that takes an element in iterable and returns the bounds for the interval. payload_parser (optional): A function that takes an element in iterable and returns the payload for the interval. Defaults to producing None for all elements. progress (Bool, optional): Whether to display a progress bar using tqdm. Defaults to False. total (int, optional): Total number of elements in iterable. Only used to estimate ETA for the progress bar, and only takes effect if progress is True. Returns: A IntervalSetMapping constructed from iterable and the parsers provided. Note: Everything in iterable will be materialized in RAM. """ key_to_intervals = {} for row in (tqdm(iterable, total=total) if progress and total is not None else tqdm(iterable) if progress else iterable): interval = Interval(bounds_parser(row), payload_parser(row)) key = key_parser(row) if key in key_to_intervals: key_to_intervals[key].append(interval) else: key_to_intervals[key] = [interval] return cls({ key: IntervalSet(intervals) for key, intervals in key_to_intervals.items() })
def coalesce(self, axis, bounds_merge_op, payload_merge_op=lambda p1, p2: p1, predicate=None, epsilon=0): """Recursively merge all intervals that are touching or overlapping along ``axis``. Merge intervals in self if they meet, overlap, or are up to ``epsilon`` apart along ``axis``. If a predicate is specified, intervals will be merged if they meet/overlap and satisfy the predicate. Repeat the process until all such intervals are merged. Merges the bounds with ``bounds_merge_op`` and merges payloads with ``payload_merge_op``. Args: axis: The axis to coalesce on. bounds_merge_op: A function that takes two bounds and returns a merged version of both of them. Along ``axis``, this function should return a bound that spans the two bounds. payload_merge_op (optional): A function that takes in two payloads and merges them. Defaults to a function that returns the first of the two payloads. predicate (optional): A function that takes an interval that is currently being coalesced and a new interval and returns whether or not the two intervals should be merged. epsilon (optional): The slack for judging if Intervals meet or overlap. Must be nonnegative. Defaults to 0 (no slack). Returns: A new IntervalSet of intervals that are disjoint along ``axis`` and are at least ``epsilon`` apart. """ if (len(self._intrvls) == 0): return self new_coalesced_intrvls = [] #tracks all intervals that are currently experiencing merging current_intrvls = [] sorted_intervals = self._intrvls.copy() sorted_intervals = sorted(sorted_intervals, key=lambda intrvl: (intrvl[axis[0]], intrvl[axis[1]])) for intrvl in sorted_intervals: new_current_intrvls = [] for cur in current_intrvls: if Bounds.cast({ axis[0] : 't1', axis[1] : 't2' })(or_pred(overlaps(), before(max_dist=epsilon)))(cur, intrvl): #adds overlapping intervals to new_current_intrvls new_current_intrvls.append(cur) else: #adds all non-overlapping intervals to new_coalesced_intrvls new_coalesced_intrvls.append(cur) current_intrvls = new_current_intrvls matched_intrvl = None loc = len(current_intrvls) - 1 #if current_intrvls is empty, we need to start constructing a new set of coalesced intervals if len(current_intrvls) == 0: current_intrvls.append(intrvl.copy()) continue if predicate is None: matched_intrvl = current_intrvls[-1] else: for index, cur in enumerate(current_intrvls): if predicate(cur, intrvl): matched_intrvl = cur loc = index #if no matching interval is found, this implies that intrvl should be the start of a new coalescing interval if matched_intrvl is None: current_intrvls.append(intrvl) else: current_intrvls[loc] = Interval( bounds_merge_op(matched_intrvl['bounds'], intrvl['bounds']), payload_merge_op(matched_intrvl['payload'], intrvl['payload']) ) for cur in current_intrvls: new_coalesced_intrvls.append(cur) return IntervalSet(new_coalesced_intrvls)
def merge_fn(key, intervals): new_bounds = output_bounds.copy() new_bounds[axis[0]] = key[0] new_bounds[axis[1]] = key[1] return Interval(new_bounds, intervals)
def map_fn(intrvl): return Interval(intrvl['bounds'], fn(intrvl['payload']))
def coalesce(self, axis, bounds_merge_op, payload_merge_op=lambda p1, p2: p1, predicate=None, epsilon=0): """Recursively merge all intervals that are touching or overlapping along ``axis``. Merge intervals in self if they meet, overlap, or are up to ``epsilon`` apart along ``axis``. Repeat the process until all such intervals are merged. Merges the bounds with ``bounds_merge_op`` and merges payloads with ``payload_merge_op``. Args: axis: The axis to coalesce on. bounds_merge_op: A function that takes two bounds and returns a merged version of both of them. Along ``axis``, this function should return a bound that spans the two bounds. payload_merge_op (optional): A function that takes in two payloads and merges them. Defaults to a function that returns the first of the two payloads. epsilon (optional): The slack for judging if Intervals meet or overlap. Must be nonnegative. Defaults to 0 (no slack). Returns: A new IntervalSet of intervals that are disjoint along ``axis`` and are at least ``epsilon`` apart. """ if (len(self._intrvls) == 0): return self new_coalesced_intrvls = [] current_intrvls = [] for intrvl in self._intrvls: for cur in current_intrvls: #adds any intervals that occured before the start of intrvl if not Bounds.cast({ axis[0]: 't1', axis[1]: 't2' })(or_pred(overlaps(), before(max_dist=epsilon)))(cur, intrvl): new_coalesced_intrvls.append(cur) #re-update contents of current_intrvls ==> contains all intervals that overlap current_intrvls = [ cur for cur in current_intrvls if Bounds.cast({ axis[0]: 't1', axis[1]: 't2' })(or_pred(overlaps(), before(max_dist=epsilon)))(cur, intrvl) ] matched_intrvl = None loc = len(current_intrvls) - 1 if len(current_intrvls) == 0: current_intrvls.append(intrvl.copy()) continue if predicate is None: matched_intrvl = current_intrvls[-1] else: for index, cur in enumerate(current_intrvls): if predicate(cur, intrvl): matched_intrvl = cur loc = index if matched_intrvl is None: current_intrvls.append(intrvl) else: current_intrvls[loc] = Interval( bounds_merge_op(matched_intrvl['bounds'], intrvl['bounds']), payload_merge_op(matched_intrvl['payload'], intrvl['payload'])) for cur in current_intrvls: new_coalesced_intrvls.append(cur) return IntervalSet(new_coalesced_intrvls)