Example #1
0
    def mergeSegments(self,segs1,segs2,ignoreInsideEnvelope=True):
        """ Given two segmentations of the same file, return the merged set of them
        Two similar segments should be replaced by their union
        Those that are inside another should be removed (?) or the too-large one deleted?
        If ignoreInsideEnvelope is true this is the first of those, otherwise the second
        """

        from intervaltree import Interval, IntervalTree
        t = IntervalTree()

        # Put the first set into the tree
        for s in segs1:
            t[s[0]:s[1]] = s

        # Decide whether or not to put each segment in the second set in
        for s in segs2:
            overlaps = t.search(s[0],s[1])
            # If there are no overlaps, add it
            if len(overlaps)==0:
                t[s[0]:s[1]] = s
            else:
                # Search for any enveloped, if there are remove and add the new one
                envelops = t.search(s[0],s[1],strict=True)
                if len(envelops) > 0:
                    if ignoreInsideEnvelope:
                        # Remove any inside the envelope of the test point
                        t.remove_envelop(s[0],s[1])
                        overlaps = t.search(s[0], s[1])
                        #print s[0], s[1], overlaps
                        # Open out the region, delete the other
                        for o in overlaps:
                            if o.begin < s[0]:
                                s[0] = o.begin
                                t.remove(o)
                            if o.end > s[1]:
                                s[1] = o.end
                                t.remove(o)
                        t[s[0]:s[1]] = s
                else:
                    # Check for those that intersect the ends, widen them out a bit
                    for o in overlaps:
                        if o.begin > s[0]:
                            t[s[0]:o[1]] = (s[0],o[1])
                            t.remove(o)
                        if o.end < s[1]:
                            t[o[0]:s[1]] = (o[0],s[1])
                            t.remove(o)

        segs = []
        for a in t:
            segs.append([a[0],a[1]])
        return segs
Example #2
0
def test_build_tree():
    pbar = ProgressBar(len(items))

    tree = IntervalTree()
    tree[0:MAX] = None
    for b, e, alloc in items:
        if alloc:
            ivs = tree[b:e]
            assert len(ivs)==1
            iv = ivs.pop()
            assert iv.begin<=b and e<=iv.end
            tree.remove(iv)
            if iv.begin<b:
                tree[iv.begin:b] = None
            if e<iv.end:
                tree[e:iv.end] = None
        else:
            ivs = tree[b:e]
            assert not ivs
            prev = tree[b-1:b]
            assert len(prev) in (0, 1)
            if prev:
                prev = prev.pop()
                b = prev.begin
                tree.remove(prev)
            next = tree[e:e+1]
            assert len(next) in (0, 1)
            if next:
                next = next.pop()
                e = next.end
                tree.remove(next)
            tree[b:e] = None
        pbar()
    tree.verify()
    return tree
Example #3
0
class virtual:
    def __init__(self):
        #mapea id drawables con su respectivo drawable
        self.idToDrawable = {}

        self.idToInterval= {}
        self.tags = {}

        #contine pares (intervaloX,idDrawable) que representan helperBoxs de elementos en espacio virtual
        self.intervalTreeX = IntervalTree()

        self.vista = None
        self.currentLocalId = 0


        self.stringTofunction = {}
        self.drawableInMemory=None

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
        fh = logging.FileHandler('virtualScreen.log')
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)


    def setCommandString(self,command,function):
        self.logger.info('Adding new command %s for file recovery ',command)
        self.stringTofunction[command] = function


    def setView(self,vista):
        self.logger.info('Setting new view ')

        self.vista = vista
        self.setCommandString('setTag',lambda args : self.setTagLast(*args) )
        self.setCommandString('SETID',lambda args : self.placeDrawable(self.drawableInMemory,args[0]) )
        self.setCommandString('setViewWidthHeight',lambda args : self.vista.vistaSetWidthHeight(*args) )
        self.setCommandString('placeView',lambda args : self.vista.placeView(*args) )
        self.setCommandString('setViewScaleXY',lambda args : self.vista.setFactorXY(*args) )


        self.setCommandString('createRectangle',lambda args : self.setLastDrawableInMemory(self.createRectangle(*args,createId=False)) )
        self.setCommandString('createLine',lambda args : self.setLastDrawableInMemory(self.createLine(*args,createId=False)) )
        self.setCommandString('createGroup',lambda args : self.setLastDrawableInMemory(self.createGroup(*args,createId=False)) )
        self.setCommandString('createText', lambda args :self.setLastDrawableInMemory(self.createText(*args,createId=False)) )
        self.setCommandString('createPointDraw', lambda args : self.setLastDrawableInMemory(self.createPointDraw(*args,createId=False)) )

    def isVisible(self,drawable,intervalosView):
        viewIntervalX = intervalosView[0]
        viewIntervalY = intervalosView[1]

        intervaloQueryX= tuple([point[0] for point in drawable.calcHelperBox()])
        intervaloQueryY= tuple([point[1] for point in drawable.calcHelperBox()])

        return self.envision(intervaloQueryX,viewIntervalX) and self.envision(intervaloQueryY,viewIntervalY)

    def envision(self,queryInter,visInterval):
        #tres casos dentro de vision 0---1---1----0  o el caso 1-----0-------0-----1 o el caso 1------0------1
        #sean los 1 el cuadro de vision
        objetoContieneVista = lambda queryInter,visInterval : min(queryInter) <= min(visInterval) and max(visInterval) <= max(queryInter)
        vistaContieneObjeto =  lambda queryInter,visInterval  : (min(visInterval)  <= queryInter[0] <= max(visInterval)) or (min(visInterval)  <= queryInter[1] <= max(visInterval))

        return objetoContieneVista(queryInter,visInterval) or vistaContieneObjeto(queryInter,visInterval)


    def winfo_height(self):
        return self.vista.heigth
    def winfo_width(self):
        return self.vista.width

    def setLastDrawableInMemory(self,drawable):
        self.drawableInMemory=drawable

    #consigue todos los elementos en cuadrado
    def getSquare(self,p0,pf,tags=None):

        temp = []

        #consigue lista con intervalos en X dentro del cuadrado (o que pasen por este)
        #Debe ser siempre begin < end
        listaIntervalos = self.intervalTreeX.search(min(p0[0],pf[0]),max(p0[0],pf[0]))

        #esto te entrega lista tuplas ((x2,x2),idDrawable)
        for tupla in listaIntervalos:
            drawable= self.idToDrawable[tupla[2]]
            #Ahora descarta los que no sean consistentes respecto al intervalo Y
            intervaloY = tuple([point[1] for point in drawable.calcHelperBox()])
            if self.envision(intervaloY,(p0[1],pf[1])):
                temp.append(drawable)
        # print 'Elem without Filter ',str(temp)
        if not tags is None:
            return [elem for elem in temp if not self.getTagdrawable(elem) in tags]

        return temp



    """
    ---------------Funciones de creacion ------------------------------
    """
    def createLine(self,p0,pf,createId=True):
        self.logger.info('Creating line in %s %s',p0,pf)
        line = Line(self,self.vista,p0,pf)
        if createId:
            self.placeDrawable(line)
        return line

    def createRectangle(self,p0,pf,createId=True):
        self.logger.info('Creating rectangle in %s %s',p0,pf)
        rect = Rectangle(self,self.vista,p0,pf)
        if createId:
            self.placeDrawable(rect)
        return rect

    def createGroup(self,listaId=None,createId=True):
        self.logger.info('Creating Group from list %s',listaId)
        group = Group(self,self.vista)
        if not listaId is None:
            for id in listaId:
                group.add(self.idToDrawable[id])

        if createId:
            self.placeDrawable(group)
        return group

    def createText(self,p0,texto,createId=True):
        self.logger.info('Creating Text %s in %s',texto,p0)
        texto = TextDrawable(self,self.vista,p0,texto)
        if createId:
            self.placeDrawable(texto)
        return texto

    def createPointDraw(self,idGroup=None,createId=True):
        self.logger.info('Creating poinDraw from group %s',idGroup)
        pd = pointDraw(self,self.vista)
        if not idGroup is None:
            grupo = self.idToDrawable[idGroup]
            pd.addFromGroup(grupo)
        if createId:
            self.placeDrawable(pd)

        return pd

    def placeDrawable(self,drawable,id=None):
        self.logger.info('Placing drawable %s',str(drawable))
        if id is None:
            drawable.uniqueId = self.__getNewId()
        else:
            drawable.uniqueId = id
        drawable.draw()
        #ASEGURATE QUE LAS HELPERBOX ESTE BIEN HECHA
        helperBoxCords = drawable.calcHelperBox()
        # print 'helperbox ',helperBoxCords
        # print "helper yo interval ",helperBoxCords
        self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId)
        self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId)

        assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval())
        self.idToDrawable[drawable.uniqueId] = drawable


    def updatePosition(self,drawable):
        if self.idToDrawable.has_key(drawable.uniqueId):
            self.logger.info('Updating %s drawable %s ',drawable.uniqueId,str(drawable))
            try:
                self.intervalTreeX.remove(self.idToInterval[drawable.uniqueId])
            except Exception,e:
                print 'Error en borrar intervalo'
                self.logger.error('Cant remove interval %s exception %s',self.idToInterval[drawable.uniqueId],str(e))

            self.idToInterval.pop(drawable.uniqueId)

            helperBoxCords = drawable.calcHelperBox()
            self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId)
            self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId)
            assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval())

            self.logger.debug('New drawable interval %s %s %s ',helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId)

        else:
                                                                         wine_sold,
                                                                         g_person_node_count+g_wine_node_count,
                                                                         g_wine_node_count,
                                                                         g_person_node_count)
 fg.node[person_node_with_fewest_edges]["c"] += 1
 wine_sold += 1
 if fg.node[person_node_with_fewest_edges]["c"] == MAX_WINE:
   fg.remove_node(person_node_with_fewest_edges)
   g_person_node_count -= 1
   person_id = long(person_node_with_fewest_edges.replace("p",""))
   has_higher = list(pt[person_id+1])
   has_lower = list(pt[person_id-1])
   if has_higher and has_lower: #this person is being insert right next to it's siblings, merge them into one
     begin = has_lower[0].begin
     end = has_higher[0].end
     pt.remove(has_lower[0])
     pt.remove(has_higher[0])
     pt.add(Interval(begin,end))
   elif has_higher:
     begin = person_id
     end = has_higher[0].end
     pt.remove(has_higher[0])
     pt.add(Interval(begin,end))
   elif has_lower:
     begin = has_lower[0].begin
     end = person_id
     pt.remove(has_lower[0])
     pt.add(Interval(begin,end))
   else:
     pt.add(Interval(person_id,person_id+1))
 fg.remove_node(wine_node_with_fewest_edges)
Example #5
0
class TipsIndex:
    """ Use an interval tree to quick get the tips at a given timestamp.

    The interval of a transaction is in the form [begin, end), where `begin` is
    the transaction's timestamp, and `end` is when it was first verified by another
    transaction.

    If a transaction is still a tip, `end` is equal to infinity.

    If a transaction has been verified many times, `end` is equal to `min(tx.timestamp)`.

    TODO Use an interval tree stored in disk, possibly using a B-tree.
    """

    # An interval tree used to know the tips at any timestamp.
    # The intervals are in the form (begin, end), where begin is the timestamp
    # of the transaction, and end is the smallest timestamp of the tx's children.
    tree: IntervalTree

    # It is a way to access the interval by the hash of the transaction.
    # It is useful because the interval tree allows access only by the interval.
    tx_last_interval: Dict[bytes, Interval]

    def __init__(self) -> None:
        self.log = logger.new()
        self.tree = IntervalTree()
        self.tx_last_interval = {}  # Dict[bytes(hash), Interval]

    def add_tx(self, tx: BaseTransaction) -> bool:
        """ Add a new transaction to the index

        :param tx: Transaction to be added
        """
        assert tx.hash is not None
        assert tx.storage is not None
        if tx.hash in self.tx_last_interval:
            return False

        # Fix the end of the interval of its parents.
        for parent_hash in tx.parents:
            pi = self.tx_last_interval.get(parent_hash, None)
            if not pi:
                continue
            if tx.timestamp < pi.end:
                self.tree.remove(pi)
                new_interval = Interval(pi.begin, tx.timestamp, pi.data)
                self.tree.add(new_interval)
                self.tx_last_interval[parent_hash] = new_interval

        # Check whether any children has already been added.
        # It so, the end of the interval is equal to the smallest timestamp of the children.
        min_timestamp = inf
        meta = tx.get_metadata()
        for child_hash in meta.children:
            if child_hash in self.tx_last_interval:
                child = tx.storage.get_transaction(child_hash)
                min_timestamp = min(min_timestamp, child.timestamp)

        # Add the interval to the tree.
        interval = Interval(tx.timestamp, min_timestamp, tx.hash)
        self.tree.add(interval)
        self.tx_last_interval[tx.hash] = interval
        return True

    def del_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None:
        """ Remove a transaction from the index.
        """
        assert tx.hash is not None
        assert tx.storage is not None

        interval = self.tx_last_interval.pop(tx.hash, None)
        if interval is None:
            return

        if not relax_assert:
            assert interval.end == inf

        self.tree.remove(interval)

        # Update its parents as tips if needed.
        # FIXME Although it works, it does not seem to be a good solution.
        for parent_hash in tx.parents:
            parent = tx.storage.get_transaction(parent_hash)
            if parent.is_block != tx.is_block:
                continue
            self.update_tx(parent, relax_assert=relax_assert)

    def update_tx(self, tx: BaseTransaction, *, relax_assert: bool = False) -> None:
        """ Update a tx according to its children.
        """
        assert tx.storage is not None
        assert tx.hash is not None

        meta = tx.get_metadata()
        if meta.voided_by:
            if not relax_assert:
                assert tx.hash not in self.tx_last_interval
            return

        pi = self.tx_last_interval[tx.hash]

        min_timestamp = inf
        for child_hash in meta.children:
            if child_hash in self.tx_last_interval:
                child = tx.storage.get_transaction(child_hash)
                min_timestamp = min(min_timestamp, child.timestamp)

        if min_timestamp != pi.end:
            self.tree.remove(pi)
            new_interval = Interval(pi.begin, min_timestamp, pi.data)
            self.tree.add(new_interval)
            self.tx_last_interval[tx.hash] = new_interval

    def __getitem__(self, index: float) -> Set[Interval]:
        return self.tree[index]
def test_all():
    from intervaltree import Interval, IntervalTree
    from pprint import pprint
    from operator import attrgetter
    
    def makeinterval(lst):
        return Interval(
            lst[0], 
            lst[1], 
            "{}-{}".format(*lst)
            )
    
    ivs = list(map(makeinterval, [
        [1,2],
        [4,7],
        [5,9],
        [6,10],
        [8,10],
        [8,15],
        [10,12],
        [12,14],
        [14,15],
        ]))
    t = IntervalTree(ivs)
    t.verify()
    
    def data(s): 
        return set(map(attrgetter('data'), s))
    
    # Query tests
    print('Query tests...')
    assert data(t[4])          == set(['4-7'])
    assert data(t[4:5])        == set(['4-7'])
    assert data(t[4:6])        == set(['4-7', '5-9'])
    assert data(t[9])          == set(['6-10', '8-10', '8-15'])
    assert data(t[15])         == set()
    assert data(t.search(5))   == set(['4-7', '5-9'])
    assert data(t.search(6, 11, strict = True)) == set(['6-10', '8-10'])
    
    print('    passed')
    
    # Membership tests
    print('Membership tests...')
    assert ivs[1] in t
    assert Interval(1,3, '1-3') not in t
    assert t.overlaps(4)
    assert t.overlaps(9)
    assert not t.overlaps(15)
    assert t.overlaps(0,4)
    assert t.overlaps(1,2)
    assert t.overlaps(1,3)
    assert t.overlaps(8,15)
    assert not t.overlaps(15, 16)
    assert not t.overlaps(-1, 0)
    assert not t.overlaps(2,4)
    print('    passed')
    
    # Insertion tests
    print('Insertion tests...')
    t.add( makeinterval([1,2]) )  # adding duplicate should do nothing
    assert data(t[1])        == set(['1-2'])
    
    t[1:2] = '1-2'                # adding duplicate should do nothing
    assert data(t[1])        == set(['1-2'])
    
    t.add(makeinterval([2,4]))
    assert data(t[2])        == set(['2-4'])
    t.verify()
    
    t[13:15] = '13-15'
    assert data(t[14])       == set(['8-15', '13-15', '14-15'])
    t.verify()
    print('    passed')
    
    # Duplication tests
    print('Interval duplication tests...')
    t.add(Interval(14,15,'14-15####'))
    assert data(t[14])        == set(['8-15', '13-15', '14-15', '14-15####'])
    t.verify()
    print('    passed')
    
    # Copying and casting
    print('Tree copying and casting...')
    tcopy = IntervalTree(t)
    tcopy.verify()
    assert t == tcopy
    
    tlist = list(t)
    for iv in tlist:
        assert iv in t
    for iv in t:
        assert iv in tlist
    
    tset = set(t)
    assert tset == t.items()
    print('    passed')
    
    # Deletion tests
    print('Deletion tests...')
    try:
        t.remove(
            Interval(1,3, "Doesn't exist")
            )
    except ValueError:
        pass
    else:
        raise AssertionError("Expected ValueError")
    
    try:
        t.remove(
            Interval(500, 1000, "Doesn't exist")
            )
    except ValueError:
        pass
    else:
        raise AssertionError("Expected ValueError")
    
    orig = t.print_structure(True)
    t.discard( Interval(1,3, "Doesn't exist") )
    t.discard( Interval(500, 1000, "Doesn't exist") )
    
    assert data(t[14])        == set(['8-15', '13-15', '14-15', '14-15####'])
    t.remove( Interval(14,15,'14-15####') )
    assert data(t[14])        == set(['8-15', '13-15', '14-15'])
    t.verify()
    
    assert data(t[2])        == set(['2-4'])
    t.discard( makeinterval([2,4]) )
    assert data(t[2])        == set()
    t.verify()
    
    assert t[14]
    t.remove_overlap(14)
    t.verify()
    assert not t[14]
    
    # Emptying the tree
    #t.print_structure()
    for iv in sorted(iter(t)):
        #print('### Removing '+str(iv)+'... ###')
        t.remove(iv)
        #t.print_structure()
        t.verify()
        #print('')
    assert len(t) == 0
    assert t.is_empty()
    assert not t
    
    t = IntervalTree(ivs)
    #t.print_structure()
    t.remove_overlap(1)
    #t.print_structure()
    t.verify()
    
    t.remove_overlap(8)
    #t.print_structure()    
    print('    passed')
    
    t = IntervalTree(ivs)
    pprint(t)
    t.split_overlaps()
    pprint(t)
    #import cPickle as pickle
    #p = pickle.dumps(t)
    #print(p)
    
Example #7
0
class StorageResource(Resource):
    def __init__(self,
                 scheduler: Scheduler,
                 name: str,
                 id: int,
                 resources_list: Resources = None,
                 capacity_bytes: int = 0):
        super().__init__(scheduler,
                         name,
                         id,
                         resources_list,
                         resource_sharing=True)
        self.capacity = capacity_bytes
        self._job_allocations: Dict[JobId, Interval] = {
        }  # job_id -> [(start, end, num_bytes)]
        self._interval_tree = IntervalTree()

    def currently_allocated_space(self) -> int:
        intervals = self._interval_tree[self._scheduler.time]
        allocated_space = sum(interval.data for interval in intervals)
        assert allocated_space <= self.capacity
        return allocated_space

    def available_space(self, start: float, end: float) -> int:
        """
        Available space in the storage resource in a time range (start, end).
        Should be the same as self._interval_tree.envelop(start, end).
        """
        intervals = self._interval_tree[start:end]
        interval_starts = [(interval.begin, interval.data)
                           for interval in intervals]
        interval_ends = [(interval.end, -interval.data)
                         for interval in intervals]
        interval_points = sorted(interval_starts +
                                 interval_ends)  # (time, value)

        # Compute max of prefix sum
        max_allocated_space = 0
        curr_allocated_space = 0
        for _, value in interval_points:
            curr_allocated_space += value
            max_allocated_space = max(max_allocated_space,
                                      curr_allocated_space)

        assert max_allocated_space <= self.capacity
        return self.capacity - max_allocated_space

    def allocate(self, start: float, end: float, num_bytes: int, job: Job):
        assert self._scheduler.time <= start <= end
        assert 0 < num_bytes <= self.available_space(start, end)
        # There should be only one interval per job.
        assert job.id not in self._job_allocations
        interval = Interval(start, end, num_bytes)
        self._job_allocations[job.id] = interval
        self._interval_tree.add(interval)
        assert bool(not self._job_allocations) == bool(
            self._interval_tree.is_empty())
        assert len(self._job_allocations) == len(
            self._interval_tree.all_intervals)
        if __debug__:
            self._interval_tree.verify()

    def free(self, job: Job):
        interval = self._job_allocations[job.id]
        self._interval_tree.remove(interval)
        del self._job_allocations[job.id]
        assert bool(not self._job_allocations) == bool(
            self._interval_tree.is_empty())
        assert len(self._job_allocations) == len(
            self._interval_tree.all_intervals)
        if __debug__:
            self._interval_tree.verify()

    def find_first_time_to_fit_job(self,
                                   job,
                                   time=None,
                                   future_reservation=False):
        raise NotImplementedError

    def get_allocation_end_times(self):
        return set(interval.end for interval in self._job_allocations.values())
Example #8
0
def subsample_region_uniformly(region, args):
    logger = logging.getLogger(region.ref_name)
    logger.info("Building interval tree.")
    tree = IntervalTree()
    with pysam.AlignmentFile(args.bam) as bam:
        ref_lengths = dict(zip(bam.references, bam.lengths))
        for r in bam.fetch(region.ref_name, region.start, region.end):
            if filter_read(r, bam, args, logger):
                continue
            # trim reads to region
            tree.add(
                Interval(max(r.reference_start, region.start),
                         min(r.reference_end, region.end), r.query_name))

    logger.info('Starting pileup.')
    coverage = np.zeros(region.end - region.start, dtype=np.uint16)
    reads = set()
    n_reads = 0
    iteration = 0
    it_no_change = 0
    last_depth = 0
    targets = iter(sorted(args.depth))
    target = next(targets)
    found_enough_depth = True
    while True:
        cursor = 0
        while cursor < ref_lengths[region.ref_name]:
            read = _nearest_overlapping_point(tree, cursor)
            if read is None:
                cursor += args.stride
            else:
                reads.add(read.data)
                cursor = read.end
                coverage[read.begin - region.start:read.end -
                         region.start] += 1
                tree.remove(read)
        iteration += 1
        median_depth = np.median(coverage)
        stdv_depth = np.std(coverage)
        logger.debug(
            u'Iteration {}. reads: {}, depth: {:.0f}X (\u00B1{:.1f}).'.format(
                iteration, len(reads), median_depth, stdv_depth))
        # output when we hit a target
        if median_depth >= target:
            logger.info("Hit target depth {}.".format(target))
            prefix = '{}_{}X'.format(args.output_prefix, target)
            _write_bam(args.bam, prefix, region, reads)
            _write_coverage(prefix, region, coverage, args.profile)
            try:
                target = next(targets)
            except StopIteration:
                break
        # exit if nothing happened this iteration
        if n_reads == len(reads):
            logger.warn("No reads added, finishing pileup.")
            found_enough_depth = False
            break
        n_reads = len(reads)
        # or if no change in depth
        if median_depth == last_depth:
            it_no_change += 1
            if it_no_change == args.patience:
                logging.warn(
                    "Coverage not increased for {} iterations, finishing pileup."
                    .format(args.patience))
                found_enough_depth = False
                break
        else:
            it_no_change == 0
        last_depth = median_depth
    return found_enough_depth