def mergeSegments(self,segs1,segs2,ignoreInsideEnvelope=True): """ Given two segmentations of the same file, return the merged set of them Two similar segments should be replaced by their union Those that are inside another should be removed (?) or the too-large one deleted? If ignoreInsideEnvelope is true this is the first of those, otherwise the second """ from intervaltree import Interval, IntervalTree t = IntervalTree() # Put the first set into the tree for s in segs1: t[s[0]:s[1]] = s # Decide whether or not to put each segment in the second set in for s in segs2: overlaps = t.search(s[0],s[1]) # If there are no overlaps, add it if len(overlaps)==0: t[s[0]:s[1]] = s else: # Search for any enveloped, if there are remove and add the new one envelops = t.search(s[0],s[1],strict=True) if len(envelops) > 0: if ignoreInsideEnvelope: # Remove any inside the envelope of the test point t.remove_envelop(s[0],s[1]) overlaps = t.search(s[0], s[1]) #print s[0], s[1], overlaps # Open out the region, delete the other for o in overlaps: if o.begin < s[0]: s[0] = o.begin t.remove(o) if o.end > s[1]: s[1] = o.end t.remove(o) t[s[0]:s[1]] = s else: # Check for those that intersect the ends, widen them out a bit for o in overlaps: if o.begin > s[0]: t[s[0]:o[1]] = (s[0],o[1]) t.remove(o) if o.end < s[1]: t[o[0]:s[1]] = (o[0],s[1]) t.remove(o) segs = [] for a in t: segs.append([a[0],a[1]]) return segs
def test_build_tree(): pbar = ProgressBar(len(items)) tree = IntervalTree() tree[0:MAX] = None for b, e, alloc in items: if alloc: ivs = tree[b:e] assert len(ivs)==1 iv = ivs.pop() assert iv.begin<=b and e<=iv.end tree.remove(iv) if iv.begin<b: tree[iv.begin:b] = None if e<iv.end: tree[e:iv.end] = None else: ivs = tree[b:e] assert not ivs prev = tree[b-1:b] assert len(prev) in (0, 1) if prev: prev = prev.pop() b = prev.begin tree.remove(prev) next = tree[e:e+1] assert len(next) in (0, 1) if next: next = next.pop() e = next.end tree.remove(next) tree[b:e] = None pbar() tree.verify() return tree
class virtual: def __init__(self): #mapea id drawables con su respectivo drawable self.idToDrawable = {} self.idToInterval= {} self.tags = {} #contine pares (intervaloX,idDrawable) que representan helperBoxs de elementos en espacio virtual self.intervalTreeX = IntervalTree() self.vista = None self.currentLocalId = 0 self.stringTofunction = {} self.drawableInMemory=None self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) fh = logging.FileHandler('virtualScreen.log') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh) def setCommandString(self,command,function): self.logger.info('Adding new command %s for file recovery ',command) self.stringTofunction[command] = function def setView(self,vista): self.logger.info('Setting new view ') self.vista = vista self.setCommandString('setTag',lambda args : self.setTagLast(*args) ) self.setCommandString('SETID',lambda args : self.placeDrawable(self.drawableInMemory,args[0]) ) self.setCommandString('setViewWidthHeight',lambda args : self.vista.vistaSetWidthHeight(*args) ) self.setCommandString('placeView',lambda args : self.vista.placeView(*args) ) self.setCommandString('setViewScaleXY',lambda args : self.vista.setFactorXY(*args) ) self.setCommandString('createRectangle',lambda args : self.setLastDrawableInMemory(self.createRectangle(*args,createId=False)) ) self.setCommandString('createLine',lambda args : self.setLastDrawableInMemory(self.createLine(*args,createId=False)) ) self.setCommandString('createGroup',lambda args : self.setLastDrawableInMemory(self.createGroup(*args,createId=False)) ) self.setCommandString('createText', lambda args :self.setLastDrawableInMemory(self.createText(*args,createId=False)) ) self.setCommandString('createPointDraw', lambda args : self.setLastDrawableInMemory(self.createPointDraw(*args,createId=False)) ) def isVisible(self,drawable,intervalosView): viewIntervalX = intervalosView[0] viewIntervalY = intervalosView[1] intervaloQueryX= tuple([point[0] for point in drawable.calcHelperBox()]) intervaloQueryY= tuple([point[1] for point in drawable.calcHelperBox()]) return self.envision(intervaloQueryX,viewIntervalX) and self.envision(intervaloQueryY,viewIntervalY) def envision(self,queryInter,visInterval): #tres casos dentro de vision 0---1---1----0 o el caso 1-----0-------0-----1 o el caso 1------0------1 #sean los 1 el cuadro de vision objetoContieneVista = lambda queryInter,visInterval : min(queryInter) <= min(visInterval) and max(visInterval) <= max(queryInter) vistaContieneObjeto = lambda queryInter,visInterval : (min(visInterval) <= queryInter[0] <= max(visInterval)) or (min(visInterval) <= queryInter[1] <= max(visInterval)) return objetoContieneVista(queryInter,visInterval) or vistaContieneObjeto(queryInter,visInterval) def winfo_height(self): return self.vista.heigth def winfo_width(self): return self.vista.width def setLastDrawableInMemory(self,drawable): self.drawableInMemory=drawable #consigue todos los elementos en cuadrado def getSquare(self,p0,pf,tags=None): temp = [] #consigue lista con intervalos en X dentro del cuadrado (o que pasen por este) #Debe ser siempre begin < end listaIntervalos = self.intervalTreeX.search(min(p0[0],pf[0]),max(p0[0],pf[0])) #esto te entrega lista tuplas ((x2,x2),idDrawable) for tupla in listaIntervalos: drawable= self.idToDrawable[tupla[2]] #Ahora descarta los que no sean consistentes respecto al intervalo Y intervaloY = tuple([point[1] for point in drawable.calcHelperBox()]) if self.envision(intervaloY,(p0[1],pf[1])): temp.append(drawable) # print 'Elem without Filter ',str(temp) if not tags is None: return [elem for elem in temp if not self.getTagdrawable(elem) in tags] return temp """ ---------------Funciones de creacion ------------------------------ """ def createLine(self,p0,pf,createId=True): self.logger.info('Creating line in %s %s',p0,pf) line = Line(self,self.vista,p0,pf) if createId: self.placeDrawable(line) return line def createRectangle(self,p0,pf,createId=True): self.logger.info('Creating rectangle in %s %s',p0,pf) rect = Rectangle(self,self.vista,p0,pf) if createId: self.placeDrawable(rect) return rect def createGroup(self,listaId=None,createId=True): self.logger.info('Creating Group from list %s',listaId) group = Group(self,self.vista) if not listaId is None: for id in listaId: group.add(self.idToDrawable[id]) if createId: self.placeDrawable(group) return group def createText(self,p0,texto,createId=True): self.logger.info('Creating Text %s in %s',texto,p0) texto = TextDrawable(self,self.vista,p0,texto) if createId: self.placeDrawable(texto) return texto def createPointDraw(self,idGroup=None,createId=True): self.logger.info('Creating poinDraw from group %s',idGroup) pd = pointDraw(self,self.vista) if not idGroup is None: grupo = self.idToDrawable[idGroup] pd.addFromGroup(grupo) if createId: self.placeDrawable(pd) return pd def placeDrawable(self,drawable,id=None): self.logger.info('Placing drawable %s',str(drawable)) if id is None: drawable.uniqueId = self.__getNewId() else: drawable.uniqueId = id drawable.draw() #ASEGURATE QUE LAS HELPERBOX ESTE BIEN HECHA helperBoxCords = drawable.calcHelperBox() # print 'helperbox ',helperBoxCords # print "helper yo interval ",helperBoxCords self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval()) self.idToDrawable[drawable.uniqueId] = drawable def updatePosition(self,drawable): if self.idToDrawable.has_key(drawable.uniqueId): self.logger.info('Updating %s drawable %s ',drawable.uniqueId,str(drawable)) try: self.intervalTreeX.remove(self.idToInterval[drawable.uniqueId]) except Exception,e: print 'Error en borrar intervalo' self.logger.error('Cant remove interval %s exception %s',self.idToInterval[drawable.uniqueId],str(e)) self.idToInterval.pop(drawable.uniqueId) helperBoxCords = drawable.calcHelperBox() self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval()) self.logger.debug('New drawable interval %s %s %s ',helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) else:
wine_sold, g_person_node_count+g_wine_node_count, g_wine_node_count, g_person_node_count) fg.node[person_node_with_fewest_edges]["c"] += 1 wine_sold += 1 if fg.node[person_node_with_fewest_edges]["c"] == MAX_WINE: fg.remove_node(person_node_with_fewest_edges) g_person_node_count -= 1 person_id = long(person_node_with_fewest_edges.replace("p","")) has_higher = list(pt[person_id+1]) has_lower = list(pt[person_id-1]) if has_higher and has_lower: #this person is being insert right next to it's siblings, merge them into one begin = has_lower[0].begin end = has_higher[0].end pt.remove(has_lower[0]) pt.remove(has_higher[0]) pt.add(Interval(begin,end)) elif has_higher: begin = person_id end = has_higher[0].end pt.remove(has_higher[0]) pt.add(Interval(begin,end)) elif has_lower: begin = has_lower[0].begin end = person_id pt.remove(has_lower[0]) pt.add(Interval(begin,end)) else: pt.add(Interval(person_id,person_id+1)) fg.remove_node(wine_node_with_fewest_edges)
class TipsIndex: """ Use an interval tree to quick get the tips at a given timestamp. The interval of a transaction is in the form [begin, end), where `begin` is the transaction's timestamp, and `end` is when it was first verified by another transaction. If a transaction is still a tip, `end` is equal to infinity. If a transaction has been verified many times, `end` is equal to `min(tx.timestamp)`. TODO Use an interval tree stored in disk, possibly using a B-tree. """ # An interval tree used to know the tips at any timestamp. # The intervals are in the form (begin, end), where begin is the timestamp # of the transaction, and end is the smallest timestamp of the tx's children. tree: IntervalTree # It is a way to access the interval by the hash of the transaction. # It is useful because the interval tree allows access only by the interval. tx_last_interval: Dict[bytes, Interval] def __init__(self) -> None: self.log = logger.new() self.tree = IntervalTree() self.tx_last_interval = {} # Dict[bytes(hash), Interval] def add_tx(self, tx: BaseTransaction) -> bool: """ Add a new transaction to the index :param tx: Transaction to be added """ assert tx.hash is not None assert tx.storage is not None if tx.hash in self.tx_last_interval: return False # Fix the end of the interval of its parents. for parent_hash in tx.parents: pi = self.tx_last_interval.get(parent_hash, None) if not pi: continue if tx.timestamp < pi.end: self.tree.remove(pi) new_interval = Interval(pi.begin, tx.timestamp, pi.data) self.tree.add(new_interval) self.tx_last_interval[parent_hash] = new_interval # Check whether any children has already been added. # It so, the end of the interval is equal to the smallest timestamp of the children. min_timestamp = inf meta = tx.get_metadata() for child_hash in meta.children: if child_hash in self.tx_last_interval: child = tx.storage.get_transaction(child_hash) min_timestamp = min(min_timestamp, child.timestamp) # Add the interval to the tree. interval = Interval(tx.timestamp, min_timestamp, tx.hash) self.tree.add(interval) self.tx_last_interval[tx.hash] = interval return True def del_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None: """ Remove a transaction from the index. """ assert tx.hash is not None assert tx.storage is not None interval = self.tx_last_interval.pop(tx.hash, None) if interval is None: return if not relax_assert: assert interval.end == inf self.tree.remove(interval) # Update its parents as tips if needed. # FIXME Although it works, it does not seem to be a good solution. for parent_hash in tx.parents: parent = tx.storage.get_transaction(parent_hash) if parent.is_block != tx.is_block: continue self.update_tx(parent, relax_assert=relax_assert) def update_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None: """ Update a tx according to its children. """ assert tx.storage is not None assert tx.hash is not None meta = tx.get_metadata() if meta.voided_by: if not relax_assert: assert tx.hash not in self.tx_last_interval return pi = self.tx_last_interval[tx.hash] min_timestamp = inf for child_hash in meta.children: if child_hash in self.tx_last_interval: child = tx.storage.get_transaction(child_hash) min_timestamp = min(min_timestamp, child.timestamp) if min_timestamp != pi.end: self.tree.remove(pi) new_interval = Interval(pi.begin, min_timestamp, pi.data) self.tree.add(new_interval) self.tx_last_interval[tx.hash] = new_interval def __getitem__(self, index: float) -> Set[Interval]: return self.tree[index]
def test_all(): from intervaltree import Interval, IntervalTree from pprint import pprint from operator import attrgetter def makeinterval(lst): return Interval( lst[0], lst[1], "{}-{}".format(*lst) ) ivs = list(map(makeinterval, [ [1,2], [4,7], [5,9], [6,10], [8,10], [8,15], [10,12], [12,14], [14,15], ])) t = IntervalTree(ivs) t.verify() def data(s): return set(map(attrgetter('data'), s)) # Query tests print('Query tests...') assert data(t[4]) == set(['4-7']) assert data(t[4:5]) == set(['4-7']) assert data(t[4:6]) == set(['4-7', '5-9']) assert data(t[9]) == set(['6-10', '8-10', '8-15']) assert data(t[15]) == set() assert data(t.search(5)) == set(['4-7', '5-9']) assert data(t.search(6, 11, strict = True)) == set(['6-10', '8-10']) print(' passed') # Membership tests print('Membership tests...') assert ivs[1] in t assert Interval(1,3, '1-3') not in t assert t.overlaps(4) assert t.overlaps(9) assert not t.overlaps(15) assert t.overlaps(0,4) assert t.overlaps(1,2) assert t.overlaps(1,3) assert t.overlaps(8,15) assert not t.overlaps(15, 16) assert not t.overlaps(-1, 0) assert not t.overlaps(2,4) print(' passed') # Insertion tests print('Insertion tests...') t.add( makeinterval([1,2]) ) # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t[1:2] = '1-2' # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t.add(makeinterval([2,4])) assert data(t[2]) == set(['2-4']) t.verify() t[13:15] = '13-15' assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() print(' passed') # Duplication tests print('Interval duplication tests...') t.add(Interval(14,15,'14-15####')) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.verify() print(' passed') # Copying and casting print('Tree copying and casting...') tcopy = IntervalTree(t) tcopy.verify() assert t == tcopy tlist = list(t) for iv in tlist: assert iv in t for iv in t: assert iv in tlist tset = set(t) assert tset == t.items() print(' passed') # Deletion tests print('Deletion tests...') try: t.remove( Interval(1,3, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") try: t.remove( Interval(500, 1000, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") orig = t.print_structure(True) t.discard( Interval(1,3, "Doesn't exist") ) t.discard( Interval(500, 1000, "Doesn't exist") ) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.remove( Interval(14,15,'14-15####') ) assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() assert data(t[2]) == set(['2-4']) t.discard( makeinterval([2,4]) ) assert data(t[2]) == set() t.verify() assert t[14] t.remove_overlap(14) t.verify() assert not t[14] # Emptying the tree #t.print_structure() for iv in sorted(iter(t)): #print('### Removing '+str(iv)+'... ###') t.remove(iv) #t.print_structure() t.verify() #print('') assert len(t) == 0 assert t.is_empty() assert not t t = IntervalTree(ivs) #t.print_structure() t.remove_overlap(1) #t.print_structure() t.verify() t.remove_overlap(8) #t.print_structure() print(' passed') t = IntervalTree(ivs) pprint(t) t.split_overlaps() pprint(t) #import cPickle as pickle #p = pickle.dumps(t) #print(p)
class StorageResource(Resource): def __init__(self, scheduler: Scheduler, name: str, id: int, resources_list: Resources = None, capacity_bytes: int = 0): super().__init__(scheduler, name, id, resources_list, resource_sharing=True) self.capacity = capacity_bytes self._job_allocations: Dict[JobId, Interval] = { } # job_id -> [(start, end, num_bytes)] self._interval_tree = IntervalTree() def currently_allocated_space(self) -> int: intervals = self._interval_tree[self._scheduler.time] allocated_space = sum(interval.data for interval in intervals) assert allocated_space <= self.capacity return allocated_space def available_space(self, start: float, end: float) -> int: """ Available space in the storage resource in a time range (start, end). Should be the same as self._interval_tree.envelop(start, end). """ intervals = self._interval_tree[start:end] interval_starts = [(interval.begin, interval.data) for interval in intervals] interval_ends = [(interval.end, -interval.data) for interval in intervals] interval_points = sorted(interval_starts + interval_ends) # (time, value) # Compute max of prefix sum max_allocated_space = 0 curr_allocated_space = 0 for _, value in interval_points: curr_allocated_space += value max_allocated_space = max(max_allocated_space, curr_allocated_space) assert max_allocated_space <= self.capacity return self.capacity - max_allocated_space def allocate(self, start: float, end: float, num_bytes: int, job: Job): assert self._scheduler.time <= start <= end assert 0 < num_bytes <= self.available_space(start, end) # There should be only one interval per job. assert job.id not in self._job_allocations interval = Interval(start, end, num_bytes) self._job_allocations[job.id] = interval self._interval_tree.add(interval) assert bool(not self._job_allocations) == bool( self._interval_tree.is_empty()) assert len(self._job_allocations) == len( self._interval_tree.all_intervals) if __debug__: self._interval_tree.verify() def free(self, job: Job): interval = self._job_allocations[job.id] self._interval_tree.remove(interval) del self._job_allocations[job.id] assert bool(not self._job_allocations) == bool( self._interval_tree.is_empty()) assert len(self._job_allocations) == len( self._interval_tree.all_intervals) if __debug__: self._interval_tree.verify() def find_first_time_to_fit_job(self, job, time=None, future_reservation=False): raise NotImplementedError def get_allocation_end_times(self): return set(interval.end for interval in self._job_allocations.values())
def subsample_region_uniformly(region, args): logger = logging.getLogger(region.ref_name) logger.info("Building interval tree.") tree = IntervalTree() with pysam.AlignmentFile(args.bam) as bam: ref_lengths = dict(zip(bam.references, bam.lengths)) for r in bam.fetch(region.ref_name, region.start, region.end): if filter_read(r, bam, args, logger): continue # trim reads to region tree.add( Interval(max(r.reference_start, region.start), min(r.reference_end, region.end), r.query_name)) logger.info('Starting pileup.') coverage = np.zeros(region.end - region.start, dtype=np.uint16) reads = set() n_reads = 0 iteration = 0 it_no_change = 0 last_depth = 0 targets = iter(sorted(args.depth)) target = next(targets) found_enough_depth = True while True: cursor = 0 while cursor < ref_lengths[region.ref_name]: read = _nearest_overlapping_point(tree, cursor) if read is None: cursor += args.stride else: reads.add(read.data) cursor = read.end coverage[read.begin - region.start:read.end - region.start] += 1 tree.remove(read) iteration += 1 median_depth = np.median(coverage) stdv_depth = np.std(coverage) logger.debug( u'Iteration {}. reads: {}, depth: {:.0f}X (\u00B1{:.1f}).'.format( iteration, len(reads), median_depth, stdv_depth)) # output when we hit a target if median_depth >= target: logger.info("Hit target depth {}.".format(target)) prefix = '{}_{}X'.format(args.output_prefix, target) _write_bam(args.bam, prefix, region, reads) _write_coverage(prefix, region, coverage, args.profile) try: target = next(targets) except StopIteration: break # exit if nothing happened this iteration if n_reads == len(reads): logger.warn("No reads added, finishing pileup.") found_enough_depth = False break n_reads = len(reads) # or if no change in depth if median_depth == last_depth: it_no_change += 1 if it_no_change == args.patience: logging.warn( "Coverage not increased for {} iterations, finishing pileup." .format(args.patience)) found_enough_depth = False break else: it_no_change == 0 last_depth = median_depth return found_enough_depth