def has_adjoining_neighbour_to_right(iv: Interval, tree: IntervalTree) -> bool: """Check if there is *no* gap between the given interval and the neighbour to the right. Note: we don't add 1 to end as it is normally exclusive, but is inclusive in overlap queries. """ return tree.overlaps(iv.end)
def test_overlaps_empty(): # Empty tree t = IntervalTree() assert not t.overlaps(-1) assert not t.overlaps(0) assert not t.overlaps(-1, 1) assert not t.overlaps(-1, 0) assert not t.overlaps(0, 0) assert not t.overlaps(0, 1) assert not t.overlaps(1, 0) assert not t.overlaps(1, -1) assert not t.overlaps(0, -1) assert not t.overlaps(Interval(-1, 1)) assert not t.overlaps(Interval(-1, 0)) assert not t.overlaps(Interval(0, 0)) assert not t.overlaps(Interval(0, 1)) assert not t.overlaps(Interval(1, 0)) assert not t.overlaps(Interval(1, -1)) assert not t.overlaps(Interval(0, -1))
def match_all(self, string): result = _ACtrie.match_all(self.ac, string) if self.is_remove_overlaps: tree = IntervalTree() new_result = [] for beg, word in result: end = beg + len(word) if not tree.overlaps(beg, end): tree[beg:end] = True new_result.append((beg, word)) del tree result = new_result return result
class Day(object): def __init__(self, start, end, dt): self.dt = dt self.free = IntervalTree([get_iv(start, end)]) self.booked = IntervalTree([]) def is_free(self, interval): return (self.free.overlaps(interval) and not self.booked.overlaps(interval)) def schedule(self, interval): assert self.is_free(interval),\ "Attempt to double-book: {} - {}".format( m2t(interval.begin), m2t(interval.end)) self.free.chop(interval.begin, interval.end + self.dt) self.booked.add(interval) def dumps(self): dump = '' for iv in sorted(self.booked): dump += "\t{} - {}\t{}\n".format( m2t(iv.begin), m2t(iv.end), iv.data) return dump
def _cgi_overlap(cgis, regions): cgiInterval = IntervalTree(Interval(cg[0], cg[1]) for cg in cgis) vcgi = [] vnoncgi = [] vvalley = [] for region in regions: if region[5] == "VALLEY": vvalley += [region[4]] else: if cgiInterval.overlaps(region[1], region[2]): vcgi += [region[4]] else: vnoncgi += [region[4]] return (vcgi, vnoncgi, vvalley)
def _cgi_overlap(cgis, regions): cgiInterval = IntervalTree(Interval(cg[0], cg[1]) for cg in cgis) vcgi = [] vnoncgi = [] vvalley = [] for region in regions: if region[5] == "VALLEY": vvalley += [region[4]] else: if cgiInterval.overlaps(region[1], region[2]): vcgi += [region[4]] else: vnoncgi += [region[4]] return(vcgi, vnoncgi, vvalley)
def sweep(binary, start_addresses: List[int], region_store=None): addr_validator = lambda x: True if type(binary) is not bytes: data = bytes(binary.read()) addr_validator = binary.addr_is_valid if region_store is None: region_store = IntervalTree() addr_stack = set(start_addresses) while addr_stack: addr = addr_stack.pop() if region_store.overlaps(addr) or not addr_validator(addr): continue function_parts = sweep_function(data, addr, addr_stack) if function_parts: fnc = Function(function_parts) yield fnc region_store |= function_parts
class Schedule(): #subclass of interval tree instead? '''represents things that can be added to a students schedule''' def __init__(self): self.week = IntervalTree() self.overlaps = [] @staticmethod def get_abs_time(day, begin, end): #TODO: account for non-military time start = begin.split(":") stop = end.split(":") time_b = float(start[0]) + float(start[1]) / 60.0 time_e = float(stop[0]) + float(stop[1]) / 60.0 time_b += Day[day].value time_e += Day[day].value return {"begin": time_b, "end": time_e} def add_course(self, course): for day, meeting_times in course.times.items(): for time in meeting_times: #time is a list abs_time = Schedule.get_abs_time(day, time[0], time[1]) if self.week.overlaps(abs_time["begin"], abs_time["end"]): new_overlap = [ value.data for value in self.week[abs_time["begin"]:abs_time["end"]] ] new_overlap.append(course) self.overlaps.append(new_overlap) self.week[abs_time["begin"]:abs_time["end"]] = course def add_courses(self, *args): for arg in args: self.add_course(arg) def get_possible_schedules(self): #got this from a stack overflow solution. Need to adapt and figure out why it works rest = tuple(el.data for el in self.week[:] if not any(el.data in ol for ol in self.overlaps)) course_list = [unique + rest for unique in product(*self.overlaps) ] #if all(u in self.week[:] for u in unique)] return course_list #return list(map(lambda course: (c.name for c in course), course_list)) def get_overlaps(self): #print overlaps better? Might have to work with having course inhe pass
class RegionFilter(Filter): intervaltree = None region_names = [] @classmethod def customize_parser(cls, parser: argparse.ArgumentParser): parser.add_argument("--region_filter", "-R", action=RegionArgParser, default=[]) def __init__(self, args: argparse.Namespace) -> "RegionFilter": super().__init__(args) self.intervaltree = IntervalTree() if hasattr(args, "region_filter"): self.region_names = args.region_filter for name in args.region_filter: regions = REGIONS[name].regions for location in regions: # convert to 0-based, half open coordinates self.intervaltree.add( Interval(location.start - 1, location.end)) def __repr__(self): name = f"{self.__class__.__name__}" if self.region_names: name += " on " + ", ".join(self.region_names) else: name += " (inactive)" return name def __call__(self, record: Record) -> Union[Record, None]: # this logic added so that it easier to add debug code retain = True if record.affected_end < record.affected_start: # this is a insert - 0 length feature retain = not self.intervaltree.overlaps_point( record.affected_start) else: # SNV or MNV (del) - size 1 and above retain = not self.intervaltree.overlaps(record.affected_start, record.affected_end) if retain: return record else: return None
def sweep_function(binary, start, addr_stack): block_tree = IntervalTree() branch_stack = [start] heapify(branch_stack) while branch_stack: addr = heappop(branch_stack) part = FunctionBlock(start_addr=addr) for inst in THUMB_DISASSEMBLER.disasm(binary[addr:], addr): i_addr = inst.address if block_tree.overlaps(i_addr): # we already visited this address (maybe a loop?) break part.instructions += 1 part.stop_addr = i_addr + inst.size ops = len(inst.operands) reg_reads, reg_writes = inst.regs_access() if inst.id in BRANCH_IDS: if ARM_REG_LR in reg_reads: if inst.cc == ARM_CC_AL: # this is a conditional return continue # this is a unconditional return break new_addr = parse_imm(inst.operands[0].imm) heappush(branch_stack, new_addr) if inst.cc == ARM_CC_AL: part.branches += 1 break else: part.conditional_branches += 1 elif ops == 1 and inst.id in CALL_IDS: new_addr = parse_imm(inst.operands[0].imm) addr_stack.add(new_addr) part.calls.add(new_addr) elif ops == 2 and inst.id in COND_BRANCH_IDS: new_addr = parse_imm(inst.operands[1].imm) heappush(branch_stack, new_addr) part.conditional_branches += 1 elif ARM_REG_PC in reg_writes: # assume return for any otherwise unmatched changes of PC break if part.start_addr < part.stop_addr: block_tree[part.start_addr:part.stop_addr] = part block_tree.merge_overlaps(add) return block_tree
class Ticketer(): def __init__(self): self.interval_tree = IntervalTree() def add_rule(self, line): split = line.rstrip().split(': ') rule = split[0] intervals = split[1].split(' or ') for interval in intervals: # print(f'Adding {interval} for {rule}') interval = interval.split('-') start = int(interval[0]) end = int(interval[1]) self.interval_tree[start:end + 1] = rule def search(self, needle): return self.interval_tree[needle] def exists(self, needle): return self.interval_tree.overlaps(needle)
def calculatePerBase(filenames, tss, sim_type, valid_chroms, minimum, maximum): all_pos = {} tree = IntervalTree() vals = [] intervals = [] for i in range(minimum, maximum, 10): vals.append(i) vals.append(i + 10) for i in range(len(vals) - 1): tree[vals[i]:vals[i + 1] - 1] = "%s.%s" % (vals[i], vals[i + 1] - 1) all_pos["%s.%s" % (vals[i], (vals[i + 1] - 1))] = [] intervals.append("%s.%s" % (vals[i], vals[i + 1] - 1)) # if mode == "long": # minLength = long_min # maxLength = long_max # elif mode == "short": # minLength = short_min # maxLength = short_max for line in tss: posRange = {} split = line.rstrip('\n').split('\t') if split[0] not in valid_chroms: continue if sim_type: chrom = split[0].split('chr')[1] else: chrom = split[0] if len(split) >= 3: t_start, strand = int(split[1]), split[2] else: t_start = int(split[1]) strand = "+" start = t_start - 1000 if start < 0: start = 0 end = t_start + 1000 for interval in intervals: posRange[interval] = [0] * 2001 for read in readIterator(filenames, chrom, start, end): if read.is_duplicate or read.is_qcfail or read.is_unmapped: continue if isSoftClipped(read.cigar): continue if read.is_paired: if read.mapq < 5: continue if read.mate_is_unmapped: continue if read.rnext != read.tid: continue if read.is_read1: if read.isize == 0: continue rstart = min(read.pos, read.pnext) + 1 # 1-based rend = rstart + abs(read.isize) - 1 # end included rmid = (int(rend) + int(rstart)) / 2 rlength = rend - rstart + 1 if tree.overlaps(rlength): interval = sorted(tree[rlength])[0].data i = rmid if start <= i <= end: if strand == "+": posRange[interval][i - start] += 1 if strand == "-": posRange[interval][-i + end] += 1 for interval in intervals: all_pos[interval].append(posRange[interval]) for i in all_pos: all_pos[i] = np.array(all_pos[i]) return all_pos
from intervaltree import IntervalTree, Interval tree = IntervalTree() tree.addi(1, 120) print(tree) testInterval = Interval(110, 130) print(tree.overlaps(testInterval)) print(tree[testInterval]) tree.merge_overlaps()
def test_all(): from intervaltree import Interval, IntervalTree from pprint import pprint from operator import attrgetter def makeinterval(lst): return Interval( lst[0], lst[1], "{}-{}".format(*lst) ) ivs = list(map(makeinterval, [ [1,2], [4,7], [5,9], [6,10], [8,10], [8,15], [10,12], [12,14], [14,15], ])) t = IntervalTree(ivs) t.verify() def data(s): return set(map(attrgetter('data'), s)) # Query tests print('Query tests...') assert data(t[4]) == set(['4-7']) assert data(t[4:5]) == set(['4-7']) assert data(t[4:6]) == set(['4-7', '5-9']) assert data(t[9]) == set(['6-10', '8-10', '8-15']) assert data(t[15]) == set() assert data(t.search(5)) == set(['4-7', '5-9']) assert data(t.search(6, 11, strict = True)) == set(['6-10', '8-10']) print(' passed') # Membership tests print('Membership tests...') assert ivs[1] in t assert Interval(1,3, '1-3') not in t assert t.overlaps(4) assert t.overlaps(9) assert not t.overlaps(15) assert t.overlaps(0,4) assert t.overlaps(1,2) assert t.overlaps(1,3) assert t.overlaps(8,15) assert not t.overlaps(15, 16) assert not t.overlaps(-1, 0) assert not t.overlaps(2,4) print(' passed') # Insertion tests print('Insertion tests...') t.add( makeinterval([1,2]) ) # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t[1:2] = '1-2' # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t.add(makeinterval([2,4])) assert data(t[2]) == set(['2-4']) t.verify() t[13:15] = '13-15' assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() print(' passed') # Duplication tests print('Interval duplication tests...') t.add(Interval(14,15,'14-15####')) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.verify() print(' passed') # Copying and casting print('Tree copying and casting...') tcopy = IntervalTree(t) tcopy.verify() assert t == tcopy tlist = list(t) for iv in tlist: assert iv in t for iv in t: assert iv in tlist tset = set(t) assert tset == t.items() print(' passed') # Deletion tests print('Deletion tests...') try: t.remove( Interval(1,3, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") try: t.remove( Interval(500, 1000, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") orig = t.print_structure(True) t.discard( Interval(1,3, "Doesn't exist") ) t.discard( Interval(500, 1000, "Doesn't exist") ) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.remove( Interval(14,15,'14-15####') ) assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() assert data(t[2]) == set(['2-4']) t.discard( makeinterval([2,4]) ) assert data(t[2]) == set() t.verify() assert t[14] t.remove_overlap(14) t.verify() assert not t[14] # Emptying the tree #t.print_structure() for iv in sorted(iter(t)): #print('### Removing '+str(iv)+'... ###') t.remove(iv) #t.print_structure() t.verify() #print('') assert len(t) == 0 assert t.is_empty() assert not t t = IntervalTree(ivs) #t.print_structure() t.remove_overlap(1) #t.print_structure() t.verify() t.remove_overlap(8) #t.print_structure() print(' passed') t = IntervalTree(ivs) pprint(t) t.split_overlaps() pprint(t) #import cPickle as pickle #p = pickle.dumps(t) #print(p)
class DateIntervalTree: """A slight adaption of the intervaltree library to support python dates The intervaltree data structure stores integer ranges, fundamentally. Therefore, if we want to store dates, we must fist convert them to integers, in a way that preserves inequalities. Luckily, the toordinal() function on datetime.date satisfies this requirement. It's important to note that this interval tree structure is, unless otherwise noted inclusive of lower bounds and exclusive of upper bounds. That is to say, an interval from A to B includes the value A and excludes the value B. """ def __init__(self): self.tree = IntervalTree() @staticmethod def to_date_interval(begin: date, end: date, data: Any) -> Interval: """Convert a date interval (and associated date, if any) into an ordinal interval""" return Interval(begin.toordinal(), end.toordinal(), data) @staticmethod def from_date_interval(ival: Interval) -> Interval: """Convert an ordinal interval to a date interval""" return Interval(date.fromordinal(ival.begin), date.fromordinal(ival.end), ival.data) def add(self, begin: date, end: date, data: Any = None): """Add a date interval to the interval tree, along with any associated date""" self.tree.add(DateIntervalTree.to_date_interval(begin, end, data)) def merge_overlaps(self, reducer: Callable = None, strict: bool = True): """Merge overlapping date intervals in the tree. A reduce function can be specified to determine how data elements are combined for overlapping intervals. The strict argument determines whether "kissing" intervals are merged. If true (the default), only "strictly" overlapping intervals are merged, otherwise adjacent intervals will also be merged. See the intervaltree library documentation for the merge_overlaps function for a more complete description. """ self.tree.merge_overlaps(data_reducer=reducer, strict=strict) def intervals(self) -> List[Interval]: """Return all date intervals in this tree""" # Note we convert from ordinal values to actual date objects return [ DateIntervalTree.from_date_interval(ival) for ival in self.tree.items() ] def overlaps(self, begin: date, end: date, strict: bool = True) -> bool: """Determine whether the given date interval overlaps with any interval in the tree. According to intervaltree, intervals include the lower bound but not the upper bound: 2015-07-23 -2015-08-21 does not overlap 2015-08-21-2015-09-21 If strict is false, add a day to the end date to return True for single day overlaps. """ if strict: ival = DateIntervalTree.to_date_interval(begin, end, None) else: ival = DateIntervalTree.to_date_interval(begin, end + timedelta(days=1), None) return self.tree.overlaps(ival.begin, ival.end) def range_query(self, begin: date, end: date) -> List[Interval]: """Return all intervals in the tree that strictly overlap with the given interval""" ival = DateIntervalTree.to_date_interval(begin, end, None) return [ DateIntervalTree.from_date_interval(ival) for ival in self.tree.overlap(ival.begin, ival.end) ] def point_query(self, point: date) -> List[Interval]: return [ DateIntervalTree.from_date_interval(ival) for ival in self.tree.at(point.toordinal()) ] @staticmethod def shift_endpoints(date_tree: "DateIntervalTree") -> "DateIntervalTree": """Produce a new tree where adjacent intervals are guaranteed to not match at a boundary by shifting the end dates of touching intervals E.g., the intervals (1/1/2000, 1/10/2000), (1/10/2000, 1/20/2000) become (1/1/2000, 1/9/2000), (1/10/2000, 1/20/2000) ^--A day was subtracted here to avoid matching exactly with the next interval Loop earliest -> latest, adjusting end date. """ adjusted = DateIntervalTree() work_list = deque(sorted(date_tree.intervals())) while work_list: cur_ival = work_list.popleft() if work_list: next_ival = work_list[0] if cur_ival.end == next_ival.begin: cur_ival = Interval(cur_ival.begin, cur_ival.end - timedelta(days=1), cur_ival.data) adjusted.add(cur_ival.begin, cur_ival.end, cur_ival.data) return adjusted @staticmethod def shift_endpoints_start( date_tree: "DateIntervalTree") -> "DateIntervalTree": """Produce a new tree where adjacent intervals are guaranteed to not match at a boundary by shifting the start dates of touching intervals E.g., the intervals (1/1/2000, 1/10/2000), (1/10/2000, 1/20/2000) become (1/1/2000, 1/10/2000), (1/11/2000, 1/20/2000) ^--A day was added here to avoid matching exactly with the next interval Loop latest -> earliest, adjusting start date. """ adjusted = DateIntervalTree() work_list = deque(sorted(date_tree.intervals(), reverse=True)) while work_list: cur_ival = work_list.popleft() if work_list: next_ival = work_list[0] if cur_ival.begin == next_ival.end: log.debug( "adjusting start of billing period: %s-%s", cur_ival.begin, cur_ival.end, ) cur_ival = Interval(cur_ival.begin + timedelta(days=1), cur_ival.end, cur_ival.data) adjusted.add(cur_ival.begin, cur_ival.end, cur_ival.data) return adjusted @staticmethod def shift_endpoints_end( date_tree: "DateIntervalTree") -> "DateIntervalTree": """Produce a new tree where adjacent intervals are guaranteed to not match at a boundary by shifting the end dates of touching intervals E.g., the intervals (1/1/2000, 1/10/2000), (1/10/2000, 1/20/2000) become (1/1/2000, 1/9/2000), (1/10/2000, 1/20/2000) ^--A day was subtracted here to avoid matching exactly with the next interval Loop latest -> earliest, adjusting end date. """ adjusted = DateIntervalTree() work_list = deque(sorted(date_tree.intervals(), reverse=True)) prev_ival = None while work_list: cur_ival = work_list.popleft() if prev_ival: while cur_ival.end >= prev_ival.begin: new_start, new_end = ( cur_ival.begin, cur_ival.end - timedelta(days=1), ) if new_start == new_end: # If new interval is one day long, shift start date back one day too. new_start = new_start - timedelta(days=1) cur_ival = Interval(new_start, new_end, cur_ival.data) prev_ival = cur_ival adjusted.add(cur_ival.begin, cur_ival.end, cur_ival.data) return adjusted
def has_adjoining_neighbour_to_left(iv: Interval, tree: IntervalTree) -> bool: """Check if there is *no* gap between the given interval and the neighbour to the left. """ return tree.overlaps(iv.begin - 1)
limit=selectedChromosome, completely_within=False): geneId = feat.id assert (geneId != None) if (feat.end - feat.start < 1): continue transcribedRegions.addi(feat.start, feat.end, geneId) nonTranscribedRegions.chop(feat.start, feat.end) print("================ [", area, "] ================") a = [] b = [] for i in range(1, chromosomeBoundaries[1], chromosomeBoundaries[1] / 143): a.append("#" if codingRegions.overlaps(i) else "-") b.append("#" if nonCodingRegions.overlaps(i) else "-") assert (a[-1] != b[-1]) #print("".join(b)) #print(area, len(transcribedRegions), len(nonTranscribedRegions)) c = [] d = [] for i in range(1, chromosomeBoundaries[1], chromosomeBoundaries[1] / 143): c.append("#" if transcribedRegions.overlaps(i) else "-") d.append("#" if nonTranscribedRegions.overlaps(i) else "-") assert (c[-1] != d[-1]) # assert( non-coding | transcribed ) if (not ((a[len(c) - 1] == "-") | (c[-1] == "#"))): print("--------------------------------------------") print("Found coding-region anomaly")