Beispiel #1
0
class Allocator (RenamingAllocatorBase):
  __slots__ = ('_maxeva', '_eva2sz', '_state')

  def __init__(self, **kwargs):
    super().__init__()
    self._eva2sz = {}
    self._maxeva = 0
    self._state = IntervalMap(4096, 2**64, False)

  def _alloc(self, event, sz):
    # Impose a minimum size on all allocations, so that, in particular,
    # zero-size allocations are still distinct entities, as required by
    # POSIX.
    if sz < 4 : sz = 4

    res = self._maxeva
    self._maxeva += sz

    self._eva2sz[res] = sz
    self._state.mark(res, sz, True)

    return res

  def _free(self, event, eva):
    self._state.mark(eva, self._eva2sz[eva], False)
    del self._eva2sz[eva]

  def _try_realloc(self, event, oeva, nsz):
    return False
class ClingyAllocatorBase(RenamingAllocatorBase):
    # Initialization ------------------------------------------------------ {{{
    __slots__ = ('_bix2state', '_bix2szbm', '_brscache', '_bucklog',
                 '_junklru', '_junkbdn', '_maxbix', '_njunkb', '_nbwb',
                 '_pagelog', '_paranoia', '_revoke_k', '_szbix2ap', '_tslam')

    __metaclass__ = ABCMeta

    # Argument definition and response ------------------------------------ {{{
    @staticmethod
    def _init_add_args(argp):
        argp.add_argument(
            '--realloc',
            action='store',
            type=str,
            default="always",
            choices=['always', 'yes', 'onlyshrink', 'never', 'no'])
        argp.add_argument('--paranoia', action='store', type=int, default=0)
        argp.add_argument('--revoke-k', action='store', type=int, default=1)
        argp.add_argument('--render-style',
                          action='store',
                          type=str,
                          default="compact",
                          choices=['compact', 'expand16'])

    def _init_handle_args(self, args):
        self._paranoia = args.paranoia
        if self._paranoia == 0 and __debug__:
            logging.warn("Assertions still enabled, even with paranoia 0; "
                         "try python -O")
        if self._paranoia != 0 and not __debug__:
            raise ValueError("Paranoia without assertions will just be slow")

        assert args.revoke_k > 0
        self._revoke_k = args.revoke_k

        if args.realloc == "never" or args.realloc == "no":
            self._try_realloc = self._try_realloc_never
        elif args.realloc == "onlyshrink":
            self._try_realloc = self._try_realloc_onlyshrink
        else:
            self._try_realloc = self._try_realloc_yes

        if args.render_style == "expand16":
            self.render = self._render_expanded

# --------------------------------------------------------------------- }}}

    def __init__(self, **kwargs):
        super().__init__()

        self._tslam = kwargs['tslam']

        # Argument parsing ---------------------------------------------------- {{{

        argp = argparse.ArgumentParser()
        self._init_add_args(argp)
        self._init_handle_args(argp.parse_args(kwargs['cliargs']))

        # --------------------------------------------------------------------- }}}

        # Power of two, greater than page log
        self._bucklog = 16
        self._pagelog = 12

        self._maxbix = 1  # Next never-touched bucket index (AHWM)
        self._szbix2ap = {}  # BUMP allocation pointer by size and bix
        self._bix2szbm = {}  # BUMP and WAIT buckets' size and bitmaps
        self._njunkb = 0  # Number of buckets in JUNK state
        self._nbwb = 0  # Number of buckets in BUMP|WAIT states
        self._bix2state = IntervalMap(self._maxbix,
                                      2**(64 - self._bucklog) - self._maxbix,
                                      BuckSt.AHWM)
        self._brscache = None  # Biggest revokable span cache

        self._junklru = dllist()  # List of all revokable spans, LRU
        self._junkbdn = {}  # JUNK bix to node in above list

# --------------------------------------------------------------------- }}}
# Size-related utility functions -------------------------------------- {{{

    def _issmall(self, sz):
        return sz <= 2**(self._bucklog - 1)

    # Find the right size bucket for a small request.  Starting from 16, we
    # divide the gap between successive powers of two into four regions and map
    # objects into the smallest one larger than their size.  The size sequence,
    # specifically, begins 16 20 24 28 32 40 48 56 64 80 96 112 128 .  We
    # consider only objects smaller than half a bucket (i.e. 2**bucklog bytes)
    # to be "small"; this is captured by _issmall(), above.
    def _szfix(self, sz):
        assert self._issmall(sz)

        if sz <= 16: return 16

        # XXX
        # At 1/4 linear separation between successive powers of two, we
        # are only guaranteed 16/4 = 4 byte alignment of objects.  If we
        # really want to get down to it, we could try doing something more
        # clever here or we could enforce that we always allocate objects
        # with size max(requested_size, alignment*4).
        bl = sz.bit_length() - 1
        fl = 1 << bl

        d = sz - fl
        if d == 0: return sz

        cl = fl << 1
        assert fl <= sz < cl

        if d <= (fl >> 1):
            if d <= (fl >> 2): return fl + (fl >> 2)
            else: return fl + (fl >> 1)
        elif d <= 3 * (fl >> 2): return fl + 3 * (fl >> 2)
        return cl

    def _maxoix(self, sz):
        return int((2**self._bucklog) / self._szfix(sz))

    def _bix2va(self, bix):
        return bix << self._bucklog

    def _va2bix(self, va):
        return va >> self._bucklog

    def _sz2nbucks(self, sz):
        return int((sz + 2**self._bucklog - 1) >> self._bucklog)

    def _nbucks2sz(self, bs):
        return bs << self._bucklog

    # --------------------------------------------------------------------- }}}
    # Additional state assertions and diagnostics ------------------------- {{{

    def _state_diag(self):
        return (self._bix2szbm, self._szbix2ap, [x for x in self._bix2state])

    def _state_asserts(self):
        # if __debug__ : logging.debug("%r %r %r", self._bix2szbm, self._szbix2ap, [x for x in self._bix2state])

        # Ensure that our _maxbix looks like the HWM
        (mbase, msz, mv) = self._bix2state[self._maxbix]
        assert mbase + msz == 2**(64 - self._bucklog), ("maxbix not max",
                                                        self._maxbix, mbase,
                                                        msz, mv)
        assert mv == BuckSt.AHWM, ("maxbix not AHWM", self._maxbix, mbase, msz,
                                   mv)

        (njunk, ntidy) = (0, 0)
        for (qb, qsz, qv) in self._bix2state:
            if qv == BuckSt.JUNK:
                # Ensure presence on linked list
                assert self._junkbdn.get(qb,
                                         None) is not None, "JUNK not on LRU"
                # Account
                njunk += qsz
            elif qv == BuckSt.TIDY:
                # Account
                ntidy += qsz
            elif qv == BuckSt.BUMP:
                # Ensure that BUMP states are backed in dictionaries
                for bc in range(qb, qb + qsz):
                    (bsz, _) = self._bix2szbm[bc]
                    assert self._szbix2ap.get(bsz) is not None, \
                        ("BUMP miss sz", bc, bsz, self._state_diag())
                    assert self._szbix2ap[bsz][bc] is not None, \
                        ("BUMP miss ix", bc, bsz, self._state_diag())
            elif qv == BuckSt.WAIT:
                # Same for WAIT states.  Not all WAIT-state buckets are
                # necessarily indexed, tho', so we have to be somewhat careful
                bc = qb
                bce = qb + qsz
                while bc < bce:
                    assert self._bix2szbm.get(bc) is not None, \
                        ("B/W miss", bc, self._state_diag())
                    (bsz, _) = self._bix2szbm[bc]
                    bc += self._sz2nbucks(bsz)

        # Check that our running sum of JUNK pages is correct
        assert self._njunkb == njunk, "JUNK accounting botch"

        nbw = 0
        for b in self._bix2szbm:
            # All busy buckets are marked as such?
            (_, _, v) = self._bix2state[b]
            assert v in [BuckSt.BUMP, BuckSt.WAIT], ("B/W botch", bc, v, \
                        self._state_diag())

            # Account
            nbw += self._sz2nbucks(self._bix2szbm[b][0])

        assert self._nbwb == nbw, \
                ("BUMP|WAIT accounting botch", nbw, self._nbwb,
                  self._bix2szbm.keys(), [x for x in self._bix2state])

        # Everything adds up, right?
        #      non-AHWM         JUNK         BUMP|WAIT      TIDY
        assert self._maxbix == self._njunkb + self._nbwb  + ntidy, \
               ("General accounting botch", self._maxbix, self._njunkb,
                 self._bix2szbm.keys(), [x for x in self._bix2state])

        # Every currently-active BUMP bucket is tagged as such, yes?
        for sz in self._szbix2ap:
            for bc in self._szbix2ap[sz]:
                (_, _, v) = self._bix2state[bc]
                assert v == BuckSt.BUMP, ("BUMP botch", bc, v,
                                          self._state_diag())

        # Ensure that JUNK list entries are so stated
        for (jb, jsz) in self._junklru:
            (qb, qsz, qv) = self._bix2state[jb]
            assert qv == BuckSt.JUNK, "LRU not JUNK"
            assert qb == jb and qsz == jsz, "LRU JUNK segment botch"

# --------------------------------------------------------------------- }}}
# Revocation logic ---------------------------------------------------- {{{

# An actual implementation would maintain a prioqueue or something;
# we can get away with a linear scan.  We interrogate the bucket state
# interval map for ease of coalescing, even though we also maintain a
# parallel JUNK LRU queue.

    def _find_largest_revokable_spans(self, n=1):
        if n == 0: return
        if n == 1 and self._brscache is not None:
            return [self._brscache]

        bests = [(0, -1, -1)]  # [(njunk, bix, sz)] in ascending order
        for (qbase, qsz,
             qv) in self._bix2state.iter_vfilter(None, self._maxbix, st_tj):

            # Smaller or busy spans don't interest us
            if qsz <= bests[0][0]: continue

            # Reject spans that are entirely TIDY already.
            js = [
                sz for (_, sz, v) in self._bix2state[qbase:qbase + qsz]
                if v == BuckSt.JUNK
            ]
            if js == []: continue

            # Sort spans by number of JUNK buckets, not JUNK|TIDY buckets
            nj = sum(js)
            if nj <= bests[0][0]: continue
            insort(bests, (nj, qbase, qsz))

            bests = bests[(-n):]

        return bests

    def _mark_tidy(self, bix, sz, nj):
        # Because we coalesce with TIDY spans while revoking, there may be
        # several JUNK spans in here.  Go remove all of them from the LRU.
        for (qbix, qsz, qv) in self._bix2state[bix:bix + sz]:
            assert qv in st_atj, "Revoking non-revokable span"
            if qv == BuckSt.JUNK: self._junklru.remove(self._junkbdn.pop(qbix))

        self._njunkb -= nj
        self._bix2state.mark(bix, sz, BuckSt.TIDY)

        self._brscache = None

    def _do_revoke(self, ss):
        if self._paranoia > PARANOIA_STATE_ON_REVOKE: self._state_asserts()

        nrev = sum([nj for (nj, _, _) in ss if nj > 0])
        ntidy = self._maxbix - self._njunkb - self._nbwb
        print("Revoking: ts=%.2f hwm=%d busy=%d junk=%d tidy=%d rev=%d rev/hwm=%2.2f%% rev/junk=%2.2f%% ss=%r" \
              % (self._tslam() / 1e9, self._maxbix, self._nbwb, self._njunkb, ntidy,
                 nrev, nrev/self._maxbix * 100, nrev/self._njunkb * 100, ss),
              file=sys.stderr)

        for (nj, bix, sz) in ss:
            self._mark_tidy(bix, sz, nj)

        self._publish(
            'revoked', "---", "",
            *((self._bix2va(bix), self._bix2va(bix + sz))
              for (_, bix, sz) in ss))

    # Conditionally revokes the top n segments if the predicate, which is
    # given the number of junk buckets in the largest span, says to.
    #
    # If given a "revoke" paramter, it must be an iterable of
    # bases of junk spans which will be guaranteed to be revoked, even if they
    # are not the largest spans known.  This may be used to force some degree
    # of reuse of small spans, as suggested by Hongyan.
    def _predicated_revoke_best(self, fn, n=None, revoke=[]):
        revoke = list(revoke)
        assert len(revoke) <= self._revoke_k

        if n is None:
            n = self._revoke_k

        nrev = None
        brss = None

        if self._brscache is not None:
            # If the best revocable span is cached, just exract the answer
            (nrev, _, _) = self._brscache
        else:
            # Otherwise, answer is not cached, so go compute it now.
            # Compute one more so we can update the cache immediately.
            brss = self._find_largest_revokable_spans(n=n + 1)
            self._brscache = brss[-1]
            nrev = self._brscache[0]

        if fn(nrev):
            # Revoking the top k spans means that the (k+1)th span is
            # certainly the most productive, in terms of the number of JUNK
            # buckets it contains.  Immediately update the cache to avoid
            # needing another sweep later.
            if brss is None:
                brss = self._find_largest_revokable_spans(n=n + 1)

            assert brss[-1][0] == nrev, \
                      ("Incorrect accounting in cache?", brss, nrev, self._brscache)

            # For each mandatory span, fish through the best spans to see if one
            # contains it.  If so, let's revoke the container rather than the
            # containee.
            rset = set()
            for mustbix in revoke:
                for (brnj, brix, brsz) in brss:
                    if brix <= mustbix < brix + brsz:
                        rset.add((brnj, brix, brsz))
                        break
                else:
                    # No container found; add the mandatory span, counting the number
                    # of junk buckets in it.
                    (qix, qsz,
                     _) = self._bix2state.get(mustbix,
                                              coalesce_with_values=st_tj)
                    rset.add(
                        (sum(sz
                             for (_, sz, v) in self._bix2state[qix:qix + qsz]
                             if v == BuckSt.JUNK), qix, qsz))

            # Now, go through the best spans until we have at most the number of
            # spans we can revoke in one go.  Since we may have picked some of the
            # best spans while considering mandatory spans above, it's not as
            # simple as just concatenating a list, but it's still not terrible.
            while len(rset) < self._revoke_k and brss != []:
                rset.add(brss[-1])
                brss = brss[:-1]

            self._do_revoke(rset)

            # Find the largest best span not used and update the cache
            while brss != []:
                if brss[-1] not in rset: break
                brss = brss[:-1]
            if brss != []: self._brscache = brss[-1]
            else: self._brscache = (0, -1, -1)

    @abstractmethod
    def _maybe_revoke(self):
        # By default, don't!
        #
        # A reasonable implementation of this function will look like a series
        # of checks about the state of the allocator (probing at the accounted
        # _njunkb and _nbwb and _maxbix) and then a call to
        # _predicated_revoke_best with an additional predicate, given the
        # number of buckets that can be reclaimed in the largest revokable
        # (JUNK|TIDY coalesced) span.  I apologize for the interface, but
        # it seemed like a good balance between detailed accounting of the best
        # revokable span at all times or always needing to walk the intervalmap.
        #
        pass

# --------------------------------------------------------------------- }}}
# Allocation ---------------------------------------------------------- {{{

# Return the bucket index to use for a small placement of size `sz` and
# made by call stack `stk`.  Available options include the existing bump
# buckets `bbks` or the TIDY/AHWM segments indicated in `tidys`.  These
# last two parameters are Python iterators, not lists, to speed up the
# most common cases.  `tidys` is an iterator of (index, length) tuples,
# each indicating possibly multiple locations.

    @abstractmethod
    def _alloc_place_small(self, stk, sz, bbks, tidys):
        raise NotImplemented()

    # Some classes may be associating metadata with bump buckets.  This
    # callback fires whenever a bump bucket fills, to indicate that no future
    # allocations will take place from that bucket and so the metadata can be
    # released.
    def _alloc_place_small_full(self, bbix):
        pass

    # Return the initial bucket index to use for a large allocation of `sz`
    # *buckets* (not bytes).  `tidys` is, as with `_alloc_place_small`, an
    # iterator of (index, length) pairs.
    @abstractmethod
    def _alloc_place_large(self, stk, sz, tidys):
        raise NotImplemented()

    def _mark_allocated(self, reqbase, reqbsz, nst):
        if self._paranoia > PARANOIA_STATE_PER_OPER:
            assert nst in {BuckSt.BUMP, BuckSt.WAIT}

            (qbase, qsz, qv) = self._bix2state.get(reqbase,
                                                   coalesce_with_values=st_at)
            assert qv in st_at, ("New allocated mark in bad state", qv)
            assert qbase + qsz >= reqbase + reqbsz, "New allocated undersized?"

        if reqbase > self._maxbix:
            # Allocation request leaving a gap; mark the skipped spans as TIDY
            # rather than leaving them as AHWM.
            #
            # While this might, technically, change the largest revokable span,
            # it will not change the number of JUNK buckets in any span, and so
            # we need not necessarily invalidate brscache.
            self._bix2state.mark(self._maxbix, reqbase - self._maxbix,
                                 BuckSt.TIDY)

        # If the allocation takes place within the current best revokable span,
        # invalidate the cache and let the revocation heuristic reconstruct it.
        if self._brscache is not None:
            (_, brsix, brssz) = self._brscache
            if brsix <= reqbase < brsix + brssz:
                self._brscache = None

        self._nbwb += reqbsz
        self._maxbix = max(self._maxbix, reqbase + reqbsz)
        self._bix2state.mark(reqbase, reqbsz, nst)

    def _alloc(self, stk, tid, sz):
        if __debug__: logging.debug(">_alloc sz=%d", sz)
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()

        # XXX should coalesce
        tidys = ((loc, tsz) for (loc, tsz, v) in self._bix2state \
                    if v in { BuckSt.TIDY , BuckSt.AHWM })

        if self._issmall(sz):
            # Is small allocation

            # Is bump bucket available?
            nsz = self._szfix(sz)
            bbs = self._szbix2ap.get(nsz, {})

            bbix = self._alloc_place_small(stk, sz, iter(bbs.keys()), tidys)
            if bbix not in bbs:
                self._publish('mapd', stk, tid, self._bix2va(bbix),
                              self._bix2va(bbix + 1), 0b11)
                self._mark_allocated(bbix, 1, BuckSt.BUMP)
                self._bix2szbm[bbix] = (nsz, 0)
                if nsz not in self._szbix2ap: self._szbix2ap[nsz] = {}
                bbap = 0
            else:
                bbap = bbs[bbix]

            if __debug__:
                # Some sanity-checking doesn't hurt, either.
                (bbsz, bbbm) = self._bix2szbm[bbix]
                assert bbsz == nsz, "Incorrect indexing of BUMP buckets"
                assert bbbm & (
                    1 << bbap) == 0, "Attempting to BUMP into free object"

            bbap += 1
            if bbap == self._maxoix(nsz):
                # out of room; can't bump this any more
                del self._szbix2ap[nsz][bbix]
                self._bix2state.mark(bbix, 1, BuckSt.WAIT)

                # Inform the placement policy that this one is no-go and won't be
                # coming back, so it can stop tracking metadata about it.
                self._alloc_place_small_full(bbix)
            else:
                assert bbap < self._maxoix(
                    nsz), "Allocation pointer beyond maximum"
                # just revise allocation pointer
                self._szbix2ap[nsz][bbix] = bbap

            res = self._bix2va(bbix) + (bbap - 1) * nsz
        else:
            # Large allocation.

            # Placement
            bsz = self._sz2nbucks(sz)
            bbix = self._alloc_place_large(stk, bsz, tidys)

            if __debug__:
                (pbase, psz, pv) = self._bix2state.get(bbix)
                assert pbase + psz >= bbix + bsz, "Large placement botch"

            # Enroll in WAIT state and map pages
            self._mark_allocated(bbix, bsz, BuckSt.WAIT)
            self._bix2szbm[bbix] = (sz, 0)
            res = self._bix2va(bbix)
            self._publish('mapd', stk, tid, res, res + self._nbucks2sz(bsz),
                          0b11)

            nsz = self._nbucks2sz(bsz)

        if __debug__: logging.debug("<_alloc eva=%x", res)
        return (res, nsz)

# --------------------------------------------------------------------- }}}
# Free ---------------------------------------------------------------- {{{

# Allow for parametrizable behavior when a bucket becomes free.  Should
# return one of
#   None  : leave the bucket considered allocated
#   True  : mark the bucket as JUNK
#   False : mark the bucket as TIDY immediately

    def _on_bucket_free(self, bix, bsz):
        return True

    # Mark a (span of) bucket(s) JUNK.
    #
    # This may change the largest revocable span, so carry out a single probe
    # of the state intervalmap to see.  Do not attempt to revise the cache
    # here, as that would require counting up the number of JUNK pages in the
    # span returned; just invalidate it and let the revocation heuristic
    # recompute it when needed.
    #
    # Junk spans are also tracked in a LRU cache; do the appropriate juggling
    # here.
    #
    # It may make sense to hook this method in subclasses, too, for further
    # metadata management, especially if we end up designing a "related
    # object" API extension: one may need to refer to metadata of objects
    # whose buckets have already gone from BUMP to BUSY, i.e., for which
    # _alloc_place_small_full() has already been called.
    def _mark_junk(self, bix, bsz):
        assert self._bix2state[bix][2] != BuckSt.JUNK, "re-marking JUNK"

        del self._bix2szbm[bix]
        self._bix2state.mark(bix, bsz, BuckSt.JUNK)
        self._njunkb += bsz
        self._nbwb -= bsz

        if self._brscache is not None:
            (brsnj, _, _) = self._brscache
            (_, qsz, _) = self._bix2state.get(bix, coalesce_with_values=st_tj)
            if qsz >= brsnj:
                self._brscache = None

        dll_im_coalesced_insert(bix, bsz, self._bix2state, self._junklru,
                                self._junkbdn)

    def _free_bix(self, bix, bsz):
        r = self._on_bucket_free(bix, bsz)
        if r == True: self._mark_junk(bix, bsz)
        elif r == False: self._mark_tidy(bix, bsz, 0)
        elif r is None: pass
        else: assert False, "Invalid return from _on_free_bix: %r" % r

    def _free(self, stk, tid, eva):
        if __debug__: logging.debug(">_free eva=%x", eva)
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()

        # Look up existing allocation
        bix = self._va2bix(eva)
        b = self._bix2szbm[bix]

        # Sanity check state
        (spanbase, spansize, spanst) = (None, None, None)
        if __debug__:
            (spanbase, spansize, spanst) = self._bix2state.get(bix)
            assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \
              ("Attempting to free in non-BUMP/WAIT bucket:", bix, spanst)

        (sz, bbm) = b
        if self._issmall(sz):
            # Small allocation.  Set bit in bitmask.
            boff = eva - self._bix2va(bix)
            assert boff % sz == 0, "Nonzero phase in small bucket"
            bitix = int(boff / sz)
            bitm = 1 << bitix
            assert bbm & bitm == 0, "Free of bitmask-free object"
            bbm |= bitm

            if spanst == BuckSt.BUMP:
                bbs = self._szbix2ap[sz]

                assert bix in bbs, ("Free in BUMP state but not any BUMP bucket", \
                    sz, bix, bbix)

                bbap = bbs[bix]

                assert bitix < bbap, ("Free in BUMP bucket beyond alloc ptr", \
                    sz, bix, bitix, bbap)

            if bbm == (1 << self._maxoix(sz)) - 1:
                # All objects now free; move bucket state
                assert bix not in self._szbix2ap.get(sz, {}), \
                  ("Freeing bucket still registered as bump block", \
                    bix, sz, self._bix2szbm[bix], self._szbix2ap.get(sz))
                assert spanst == BuckSt.WAIT, "Freeing bucket in non-WAIT state"
                self._free_bix(bix, 1)

                # XXX At the moment, we only unmap when the entire bucket is free.
                # This is just nwf being lazy and not wanting to do the bit math for
                # page-at-a-time release.
                self._publish('unmapd', stk, tid, self._bix2va(bix),
                              self._bix2va(bix + 1))

            else:
                # Just update
                self._bix2szbm[bix] = (sz, bbm)
        else:
            # Large allocation, retire all blocks to JUNK, UNMAP, and maybe revoke
            bsz = self._sz2nbucks(sz)

            assert spanst == BuckSt.WAIT, \
              ("Freeing large span in incorrect state", sz, spanst, bix, b, self._state_diag())
            assert spanbase <= bix and bix + bsz <= spanbase + spansize, \
              "Mismatched bucket states of large allocation"

            self._free_bix(bix, bsz)
            self._publish('unmapd', stk, tid, self._bix2va(bix),
                          self._bix2va(bix + bsz))
        if __debug__: logging.debug("<_free eva=%x", eva)

# --------------------------------------------------------------------- }}}
# Reallocation -------------------------------------------------------- {{{

# Since we sometimes allocate a bit more than we need, our realloc is
# potentially nontrivial.  We're being a little sloppy if we do this,
# tho', as we're reusing memory without revoking it.  We consider this
# acceptable, tho', because we presume that realloc does not change the
# type of the object nor its effective lifetime, and so even if the object
# temporarily shrinks and then expands, it's still the same object.
#
# If you're not convinced by the above, you're exactly the kind of person
# that --realloc=onlyshrink or --realloc=none are for!
#
# NB: When making an object smaller, this does not and MUST NOT transition
# the tail to JUNK state for pending reuse, because the capabilities we
# have will have given out originally still span the whole region and so
# could come to overlap later allocations (if this allocation is not
# freed, so that they will not be subsets in the revocation test).  A real
# revoker may wish to scream loudly if it finds capabilities partially
# overlapping the revocation region (subsuming capabilities are presumably
# OK, as the allocator holds these).
#

    def _try_realloc_yes(self, stk, tid, oeva, nsz):
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()

        # Find the size of the existing allocation
        bix = self._va2bix(oeva)
        b = self._bix2szbm[bix]

        # Sanity check state
        if __debug__:
            (spanbase, spansize, spanst) = self._bix2state.get(bix)
            assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \
              "Attempting to realloc in non-BUMP/WAIT bucket"

        (osz, _) = b

        if nsz <= osz:
            if __debug__: logging.debug("<_try_realloc shrink eva=%x", oeva)
            # Shrinking is always fine, I suppose
            # Don't update the block size, even if it's not a bitmap bucket (which
            # can't be so updated anyway) and don't move anything to JUNK
            return True

        if self._issmall(osz):
            if __debug__: logging.debug("<_try_realloc small eva=%x", oeva)
            # Small allocation not growing by much.
            return self._issmall(nsz) and self._szfix(nsz) == osz
            # Unfortunately, even if the next small piece is free, it's not easy
            # to use it.  While we could grow into it and immediately mark it free
            # (relying on the non-freeness of the current allocation to prevent
            # freeing of the bucket, though this becomes more complicated with
            # page-at-a-time unmapping), subsequent reallocations would not only
            # not be able to do this trick but also fail to copy the additional
            # data, which would be really bad, since the size is derived from the
            # bucket metadata.

        # Large allocation getting larger.  If not much larger...
        if nsz <= self._nbucks2sz(self._sz2nbucks(osz)):
            if __debug__:                logging.debug("<_try_realloc sm enlarging eva=%x (bix=%d) osz=%d nsz=%d %s", \
     self._bix2va(bix), bix, osz, nsz, self._state_diag())
            self._bix2szbm[bix] = (nsz, 0)
            return True

        # It might happen that we have enough free spans ahead of us that we can
        # just gobble them.
        eix = bix + self._sz2nbucks(osz)

        (nextbase, nextsize, nextst) = self._bix2state.get(eix)
        if nextst not in {BuckSt.TIDY, BuckSt.AHWM}:
            if __debug__:
                logging.debug("<_try_realloc up against %s at eva=%x", nextst,
                              oeva)
            return False

        if nsz <= osz + self._nbucks2sz(nextsize):
            if __debug__:                logging.debug("<_try_realloc enlarging eva=%x osz=%d nsz=%d", \
     self._bix2va(bix), osz, nsz)
            self._bix2szbm[bix] = (nsz, 0)
            self._mark_allocated(eix, self._sz2nbucks(nsz - osz), BuckSt.WAIT)
            self._publish('mapd', stk, tid,
                          self._nbucks2sz(bix + self._sz2nbucks(osz)), \
                          self._nbucks2sz(bix + self._sz2nbucks(nsz)), 0b11)
            return True

        return False

    # In-place allocations only if the resulting object is smaller (or no
    # bigger than size rounded up, in terms of bitmap buckets; we don't have
    # the original size to enforce a strict shrinkage policy); see caveat
    # above for why this does not transition any bytes to JUNK.
    def _try_realloc_onlyshrink(self, stk, tid, oeva, nsz):
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()

        # Find the size of the existing allocation
        bix = self._va2bix(oeva)
        b = self._bix2szbm[bix]
        # Sanity check state
        if __debug__:
            (spanbase, spansize, spanst) = self._bix2state.get(bix)
            assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \
              "Attempting to realloc in non-BUMP/WAIT bucket"
        (osz, _) = b
        return nsz <= osz

    def _try_realloc_never(self, stk, tid, oeva, nsz):
        # Don't bother with PARANOIA_STATE_PER_OPER since we're just going to
        # call down anyway

        return False

# --------------------------------------------------------------------- }}}
# Rendering ----------------------------------------------------------- {{{

    def occshade(self, szbm):
        (sz, bm) = szbm

        maxoix = self._maxoix(sz)
        pc = bin(bm).count("1")  # XXX YIKES
        i = ceil(256.0 * pc / maxoix) - 1
        return (i << 8) + ((255 - i) << 16)

    def render(self, img):
        from common.render import renderSpansZ
        from PIL import ImageDraw

        zo = img.width.bit_length() << 1
        basebix = next(loc for (loc, _, _) in self._bix2state)

        # Paint most of the buckets; exclude AHWM since that's big
        renderSpansZ(
            img, zo,
            ((loc - basebix, sz, bst2color[st])
             for (loc, sz, st) in self._bix2state if st != BuckSt.AHWM))

        # Paint over small WAIT buckets with some occupancy information
        renderSpansZ(img, zo,
                     ((bix - basebix, 1, self.occshade(self._bix2szbm[bix]))
                      for (loc, sz, st) in self._bix2state if st == BuckSt.WAIT
                      for bix in range(loc, loc + sz)
                      if self._bix2szbm.get(bix, None) is not None
                      if self._issmall(self._bix2szbm[bix][0])))

        # Paint over the largest revokable span (which may hide some TIDY
        # blocks, but that's fine)
        brss = self._find_largest_revokable_spans(n=1)
        if brss != [] and brss[0][1] is not None:
            renderSpansZ(img, zo, [(brss[0][1] - basebix, brss[0][2], cBRS)])

        # Paint over the oldest JUNK span
        oldestj = self._junklru.first
        if oldestj is not None:
            (qbix, qsz, _) = self._bix2state.get(oldestj.value[0],
                                                 coalesce_with_values=st_tj)
            renderSpansZ(img, zo, [(qbix - basebix, qsz, cOJS)])

    def _render_expanded(self, img):
        from common.render import renderSpansZ
        from PIL import ImageDraw

        zo = img.width.bit_length() << 1
        basebix = next(loc for (loc, _, _) in self._bix2state)

        # bix and offset to pixel index
        def expand(bix, off):
            return (bix * 2**self._bucklog + off + 15) >> 4

        # render a bitmap bucket at block index offset (relative to basebix)
        def rendszbm(bio, ap, sz, bm):
            ep = self._maxoix(sz)

            if ap is None: ap = ep

            for oix in range(0, ap):
                renderSpansZ(
                    img, zo,
                    [(expand(bio, oix * sz), expand(0,
                                                    sz * (oix + 1) - 1) -
                      expand(0, sz * oix), bst2color[BuckSt.JUNK] if bm
                      & 1 == 1 else bst2color[BuckSt.WAIT])])
                bm >>= 1

            renderSpansZ(img, zo,
                         [(expand(bio, ap * sz), expand(0, sz * ep) -
                           expand(0, sz * ap), bst2color[BuckSt.TIDY])])

        # Paint most of the buckets; exclude AHWM since that's big
        for (loc, sz, st) in self._bix2state:
            # skip AHWM
            if st == BuckSt.AHWM:
                continue
            # JUNK, and TIDY are entirely uniform
            elif st == BuckSt.JUNK:
                renderSpansZ(
                    img, zo,
                    [(expand(loc - basebix, 0), expand(sz, 0), bst2color[st])])
            elif st == BuckSt.TIDY:
                renderSpansZ(
                    img, zo,
                    [(expand(loc - basebix, 0), expand(sz, 0), bst2color[st])])
            # BUMP states are backed at every bix with a bitmap
            elif st == BuckSt.BUMP:
                for bix in range(loc, loc + sz):
                    (asz, bm) = self._bix2szbm[bix]
                    rendszbm(bix - basebix, self._szbix2ap[asz].get(bix, None),
                             asz, bm)
            # WAIT states are complicated: they are either backed with a bitmap
            # or by a large value, indicating the uniform occupancy of one or
            # more buckets.  We don't have better resolution than that, so just
            # render those uniformly.
            elif st == BuckSt.WAIT:
                bix = loc
                while bix < loc + sz:
                    (asz, bm) = self._bix2szbm[bix]
                    if self._issmall(asz):
                        # bitmap, one bucket
                        rendszbm(bix - basebix,
                                 self._szbix2ap[asz].get(bix, None), asz, bm)
                        bix += 1
                    else:
                        # large object
                        nsz = self._sz2nbucks(asz)
                        renderSpansZ(img, zo,
                                     [(expand(bix - basebix, 0), expand(
                                         nsz, 0), bst2color[st])])
                        bix += nsz
class TraditionalAllocatorBase(RenamingAllocatorBase):
    # Initialization ------------------------------------------------------ {{{

    __slots__ = (
        '_alignlog',  # Power of two default alignment
        '_alignmsk',  # Derived alignment mask
        '_minsize',  # Minimum allocation size
        '_paranoia',  # Self-tests
        '_tslam',  # fetch the current trace timestamp
        '_basepg',  # Bottom-most page index to use
        '_brscache',  # cached biggest revokable span
        '_eva2sst',  # Emulated Virtual Address to Segment STate
        '_eva2sz',  # EVA to size for outstanding allocations (WAIT states)
        '_evp2pst',  # Emulated Virtual Page to Page STate
        '_junklru',  # LRU queue of JUNK segments, by base address
        '_junkadn',  # JUNK segment base EVA to node in junklru
        '_njunk',  # Number of bytes JUNK
        '_nmapped',  # Number of bytes MAPD
        '_npend',  # Number of bytes in PEND state
        '_nwait',  # Number of bytes WAIT (allocated)
        '_pagelog',  # Base-2 log of page size
        '_tidylst',  # SegFreeList of TIDY spans
        '_wildern'  # Wilderness location
    )

    __metaclass__ = ABCMeta

    # Argument definition and response ------------------------------------ {{{
    @staticmethod
    def _init_add_args(argp):
        argp.add_argument('--paranoia', action='store', type=int, default=0)
        argp.add_argument('--min-size', action='store', type=int, default=16)
        argp.add_argument('--align-log', action='store', type=int, default=2)

    def _init_handle_args(self, args):
        self._alignlog = args.align_log
        self._alignmsk = (1 << args.align_log) - 1

        self._minsize = args.min_size

        self._paranoia = args.paranoia
        if self._paranoia == 0 and __debug__:
            logging.warn("Assertions still enabled, even with paranoia 0; "
                         "try python -O")
        if self._paranoia != 0 and not __debug__:
            raise ValueError("Paranoia without assertions will just be slow")

# --------------------------------------------------------------------- }}}

    def __init__(self, **kwargs):
        super().__init__()
        self._tslam = kwargs['tslam']
        self._paranoia = 0

        # Argument parsing ---------------------------------------------------- {{{

        argp = argparse.ArgumentParser()
        self._init_add_args(argp)
        self._init_handle_args(argp.parse_args(kwargs['cliargs']))

        # --------------------------------------------------------------------- }}}

        self._pagelog = 12
        self._basepg = 1
        baseva = self._basepg * 2**self._pagelog

        self._brscache = None
        self._eva2sst = IntervalMap(baseva, 2**64 - baseva, SegSt.AHWM)
        self._eva2sz = {}
        self._evp2pst = IntervalMap(self._basepg,
                                    2**(64 - self._pagelog) - self._basepg,
                                    PageSt.UMAP)
        self._junklru = dllist()
        self._junkadn = {}
        self._njunk = 0
        self._nmapped = 0
        self._npend = 0
        self._nwait = 0
        self._tidylst = SegFreeList(extcoal=self._sfl_coalesce)
        self._wildern = baseva

# --------------------------------------------------------------------- }}}
# Size-related utility functions -------------------------------------- {{{

    def _eva2evp(self, eva):
        return eva >> self._pagelog

    def _evp2eva(self, evp):
        return evp << self._pagelog

    def _eva2evp_roundup(self, eva):
        return (eva + (1 << self._pagelog) - 1) >> self._pagelog

    def _npg2nby(self, npg):
        return npg << self._pagelog

    def _eva_align_roundup(self, eva):
        return ((eva + self._alignmsk) >> self._alignlog) << self._alignlog

# --------------------------------------------------------------------- }}}
# Additional state assertions and diagnostics ------------------------- {{{

    def _state_asserts(self):

        # Ensure that our wilderness looks like the HWM
        (qbase, qsz, qv) = self._eva2sst[self._wildern]
        # "I'm sure it's around here somewhere"
        assert qbase + qsz == 2**64, ("wilderness lost", self._wildern, qbase,
                                      qsz, qv)
        # "no longer above high water mark"
        assert qv == SegSt.AHWM, ("wilderness flooded", self._wildern, qbase,
                                  qsz, qv)

        # All outstanding allocations are backed by WAIT and MAPD segments, yes?
        for a in self._eva2sz.keys():
            (qbase, qsz, qv) = self._eva2sst[a]
            assert qv == SegSt.WAIT, ("rude allocation", a, qv)  # not WAITing
            # segment too short for allocation
            assert qbase + qsz >= a + self._eva2sz[a], ("alloc overflow", a)

            (qbase, qsz, qv) = self._evp2pst[self._eva2evp(a)]
            assert qv == PageSt.MAPD, ("lost allocation", a, qv)  # "un-mapped"
            assert self._evp2eva(qbase) + self._npg2nby(qsz) >= a + self._eva2sz[a],\
              ("partially lost allocation", a, self._eva2sz[a], qbase, qsz)

        # All JUNK queue entries are backed by JUNK segments
        for (jb, jsz) in self._junklru:
            (qb, qsz, qv) = self._eva2sst[jb]
            assert jb == qb and jsz == qsz and qv == SegSt.JUNK, \
                   ("JUNK list state mismatch", (jb, jsz), (qb, qsz, qv))
            assert jb in self._junkadn, "JUNK node not in index"
            assert (jb, jsz) == self._junkadn[jb].value, "JUNK index botch"

        for jb in self._junkadn:
            assert self._junkadn[jb].value[0] == jb
            jsz = self._junkadn[jb].value[1]
            (qb, qsz, qv) = self._eva2sst[jb]
            assert jb == qb and jsz == qsz and qv == SegSt.JUNK, \
                   ("JUNK list state mismatch", (jb, jsz), (qb, qsz, qv))

        # All TIDY list entries are backed by TIDY segments, and the SegFL is OK
        for (tb, tsz) in self._tidylst.iterlru():
            (qb, qsz, qv) = self._eva2sst[tb]
            assert tb == qb and tsz == qsz and qv == SegSt.TIDY, \
                   ("TIDY list state mismatch", (tb, tsz), (qb, qsz, qv))

        self._tidylst.crossreference_asserts()

        # All WAIT spans are covered by allocations, all JUNK and TIDY spans
        # correspond with entries in their queues
        nwait = 0
        njunk = 0
        npend = 0
        for (qb, qsz, qv) in self._eva2sst:
            if qv == SegSt.WAIT:
                nwait += qsz
                ab = qb
                while ab < qb + qsz:
                    asz = self._eva2sz.get(ab, None)
                    assert asz is not None, ("WAIT w/o alloc sz", qb, ab)
                    ab += asz
                assert ab == qb + qsz, "Allocations overrun WAIT segment?"
            elif qv == SegSt.TIDY:
                assert qsz == self._tidylst.peek(qb)
            elif qv == SegSt.JUNK:
                njunk += qsz
                dln = self._junkadn.get(qb, None)
                assert dln is not None
                assert dln.value == (qb, qsz)
            elif qv == SegSt.AHWM:
                assert qb == self._wildern, "There must be only one final frontier"
            elif qv == SegSt.PEND:
                npend += qsz
        assert nwait == self._nwait, ("Improper account of WAIT bytes", nwait,
                                      self._nwait)
        assert njunk == self._njunk, ("Improper account of JUNK bytes", njunk,
                                      self._njunk)
        assert npend == self._npend, ("Improper account of PEND bytes", npend,
                                      self._npend)

        # All MAPD segments have some reason to be mapped?  Well, maybe not
        # exactly, since we are lazy about unmapping, or might be.
        #
        ## for (mb, msz, mv) in self._eva2pst :
        ##     if mv != PageSt.MAPD : continue
        ##     for (qb, qsz, qv) in self._eva2sst[mb:mb+msz] :
        ##         if qv == SegSt.WAIT : break
        ##     else : assert False, ("MAPD w/o WAIT", mb, msz)

# --------------------------------------------------------------------- }}}
# Revocation logic ---------------------------------------------------- {{{

    def _sfl_coalesce(self, va):
        (qva, qsz, _) = self._eva2sst.get(va)
        return (qva, qsz)

    # Mark a span TIDY.  This must not be used to re-mark any existing TIDY
    # span.
    #
    # Inserts the coalesced span at the end of tidylst.
    def _mark_tidy(self, loc, sz):
        self._eva2sst.mark(loc, sz, SegSt.TIDY)
        self._tidylst.insert(loc, sz)

    def _mark_revoked(self, loc, sz):
        self._mark_tidy(loc, sz)

    # An actual implementation would maintain a prioqueue or something;
    # we can get away with a linear scan.  We interrogate the segment state
    # interval map for ease of coalescing, even though we also maintain a
    # parallel JUNK LRU queue.  Returns spans as (njunk, base, size) triples,
    # coalescing JUNK and TIDY segments together.
    def _find_largest_revokable_spans(self, n=1):
        if n == 0: return
        if n == 1 and self._brscache is not None:
            return [self._brscache]

        bests = [(0, -1, -1)]  # [(njunk, loc, sz)] in ascending order
        for (qbase, qsz,
             qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj):

            # smaller spans don't interest us
            if qsz <= bests[0][0]: continue

            # Reject spans that are entirely TIDY already.
            js = [
                sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz]
                if v == SegSt.JUNK
            ]
            if js == []: continue

            # Sort spans by number of JUNK bytes, not JUNK|TIDY bytes
            nj = sum(js)
            if nj <= bests[0][0]: continue
            insort(bests, (nj, qbase, qsz))

            bests = bests[(-n):]

        # Go ahead and set this now, even though it's likely we're about to
        # use this span in revocation and, so, invalidate this cache.  Still,
        # if we don't, so much the better, yeah?
        self._brscache = bests[-1]

        return [best for best in bests if best[1] >= 0]

    def _do_revoke(self, ss):
        if self._paranoia > PARANOIA_STATE_ON_REVOKE: self._state_asserts()

        self._brscache = None

        for (nj, loc, sz) in ss:
            self._njunk -= nj
            # Because we coalesce with TIDY spans while revoking, there may be
            # several JUNK spans in here.  Go remove all of them from the LRU.
            for (qb, qsz, qv) in self._eva2sst[loc:loc + sz]:
                assert qv in sst_tj, "Revoking non-revokable span"
                if qv == SegSt.JUNK:
                    self._junklru.remove(self._junkadn.pop(qb))
                    self._mark_revoked(qb, qsz)

        self._publish('revoked', "---", "",
                      *((loc, loc + sz) for (_, loc, sz) in ss))

    def _do_revoke_best_and(self, n=None, revoke=[]):

        revs = list(revoke)
        assert len(revs) <= self._revoke_k, (revoke)

        if n is None:
            n = self._revoke_k

        nrev = None
        brss = self._find_largest_revokable_spans(n=n + 1)

        rset = set()
        for rloc in revs:
            for (brnj, brloc, brsz) in brss:
                if brloc <= rloc < brloc + brsz:
                    rset.add((brnj, brloc, brsz))
                    break
            else:
                (qloc, qsz,
                 qv) = self._eva2sst.get(rloc, coalesce_with_values=sst_tj)
                rset.add((sum([
                    sz for (_, sz, v) in self._eva2sst[qloc:qloc + qsz]
                    if v == SegSt.JUNK
                ]), qloc, qsz))
        while len(rset) <= n and brss != []:
            rset.add(brss[-1])
            brss = brss[:-1]

        self._do_revoke(rset)

        while brss != []:
            if brss[-1] not in rset: break
            brss = brss[:-1]
        if brss != []: self._brscache = brss[-1]
        else: self._brscache = (0, -1, -1)

    @abstractmethod
    def _maybe_revoke(self):
        pass

# --------------------------------------------------------------------- }}}
# Allocation ---------------------------------------------------------- {{{

    def _alloc_place(self, stk, sz):
        # XXX Approximate best-fit / oldest-fit strategy, since coalesced
        # entries are moved to the back of the tidy list.
        #
        # Note the requirement to either fit exactly or leave some threshold
        # of bytes available. (XXX but it's not quite the right test, is it?)
        #
        for (pos, psz) in self._tidylst.iterfor(sz, 1 << self._alignlog):
            apos = self._eva_align_roundup(pos)
            if apos == pos: return pos
            elif pos + psz >= apos + sz: return apos
        return self._eva_align_roundup(self._wildern)

    def _ensure_mapped(self, stk, tid, reqbase, reqsz):
        pbase = self._eva2evp(reqbase)
        plim = self._eva2evp(reqbase + reqsz - 1) + 1
        for (qb, qsz, qv) in self._evp2pst[pbase:plim]:
            if qv == PageSt.MAPD: continue

            b = max(qb, pbase)
            l = min(qb + qsz, plim)

            self._nmapped += self._npg2nby(l - b)
            self._publish('mapd', stk, tid, self._evp2eva(b), self._evp2eva(l),
                          0b11)
        self._evp2pst.mark(pbase, plim - pbase, PageSt.MAPD)

    # When marking a span allocated, we may have residual TIDY segments left
    # over.  Because overriding implementatins may be tracking their own
    # metadata about TIDY spans, we provide this hook for intercepting without
    # having to duplicate all the work doen in _mark_allocated.  Unlike
    # _mark_tidy, these spans are already marked TIDY, they are just not in
    # the TIDY metadata structures.
    def _mark_allocated_residual(self, stk, loc, sz, isLeft):
        self._tidylst.insert(loc, sz)

    def _mark_allocated(self, stk, reqbase, reqsz):
        if self._paranoia > PARANOIA_STATE_PER_OPER:
            (qbase, qsz, qv) = self._eva2sst.get(reqbase,
                                                 coalesce_with_values=sst_at)
            assert qv in sst_at, ("New allocated mark in bad state", \
              (reqbase, reqsz), (qbase, qsz, qv), list(self._eva2sst))
            assert qbase + qsz >= reqbase + reqsz, "New allocated undersized?"

        # Remove span from tidy list; may create two more entries.
        # No need to use the coalescing insert functionality here because we
        # know, inductively, that we certainly won't coalesce in either direction.
        #
        # XXX We act as though any residual spans have been just created; is
        # that the right policy?
        #
        # XXX Don't create segments less than the minimum allocation size, as
        # there's no possible utility to them and we'll catch them
        # post-coalescing in mark_tidy.  This change will require modification
        # to our asserts and sanity checking, too.
        if reqbase < self._wildern:
            (qb, qsz, qv) = self._eva2sst[reqbase]
            assert qv == SegSt.TIDY
            assert qsz >= reqsz
            tsz = self._tidylst.remove(qb)
            assert tsz == qsz

            # Do the marking now, so that our work on our tidy list sees the
            # correct (lack of) coalescing hereafter, but above we wanted to find
            # the whole TIDY span.  The duplication with the else branch below is
            # a little sad. :/
            self._eva2sst.mark(reqbase, reqsz, SegSt.WAIT)

            if qb + qsz != reqbase + reqsz:
                # Insert residual right span
                self._mark_allocated_residual(stk, reqbase + reqsz,
                                              qb + qsz - reqbase - reqsz,
                                              False)
            if reqbase != qb:
                # Insert residual left span
                self._mark_allocated_residual(stk, qb, reqbase - qb, True)
        else:
            # Homesteading beyond the wildnerness frontier leaves a TIDY gap
            if reqbase > self._wildern:
                self._mark_tidy(self._wildern, reqbase - self._wildern)
            self._eva2sst.mark(reqbase, reqsz, SegSt.WAIT)

        # If the allocation takes place within the current best revokable span,
        # invalidate the cache and let the revocation heuristic reconstruct it.
        if self._brscache is not None:
            (_, brsix, brssz) = self._brscache
            if brsix <= reqbase < brsix + brssz:
                self._brscache = None

        self._nwait += reqsz
        self._wildern = max(self._wildern, reqbase + reqsz)

    def _alloc(self, stk, tid, sz):
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()

        if sz < self._minsize: sz = self._minsize  # minimum size
        sz = (sz + self._alignmsk) & ~self._alignmsk  # and alignment

        loc = self._alloc_place(stk, sz)

        assert loc & self._alignmsk == 0

        self._ensure_mapped("malloc " + stk, tid, loc, sz)
        self._mark_allocated(stk, loc, sz)
        self._eva2sz[loc] = sz
        return (loc, sz)

# --------------------------------------------------------------------- }}}
# Free ---------------------------------------------------------------- {{{

    def _ensure_unmapped(self, stk, tid, loc, sz):
        pbase = self._eva2evp_roundup(loc)
        plim = self._eva2evp(loc + sz - 1)
        if pbase == plim: return  # might not be an entire page

        for (qb, qsz, qv) in self._evp2pst[pbase:plim]:
            if qv == PageSt.UMAP: continue

            b = max(qb, pbase)
            l = min(qb + qsz, plim)

            self._nmapped -= self._npg2nby(l - b)
            self._publish('unmapd', stk, tid, self._evp2eva(b),
                          self._evp2eva(l))
        self._evp2pst.mark(pbase, plim - pbase, PageSt.UMAP)

    # When exiting the WAIT sate, there are multiple ways things can go:
    #
    #   PEND: for some reason, this span of memory is neither reusable nor
    #         revokable.  One assumes that eventually this will no longer
    #         be true and so we will see PEND -> {TIDY, JUNK} transitions
    #         here, too.
    #
    #   TIDY: This memory does not need to be run through a revocation pass.
    #         Either we are running in an unsafe mode or there is some other
    #         mechanism available, such as fast pointer invalidation
    #         (MTE/SSM).
    #
    #  JUNK: This memory needs to be revoked before it can be reused.
    #
    # This function handles all of the associated logic.  The lists given
    # should not contain coalescable regions for efficiency's sake, but I do
    # not think anything will go wrong if they do.
    def _mark_free(self, stk, tid, pends, tidys, junks):

        for (loc, sz) in pends:
            self._nwait -= sz
            self._npend += sz
            self._eva2sst.mark(loc, sz, SegSt.PEND)

        for (loc, sz) in tidys:
            self._mark_free_accounting_helper(loc, sz)
            self._mark_tidy(loc, sz)

            (qb, qsz, qv) = self._eva2sst.get(loc)
            assert qv == SegSt.TIDY, (loc, sz, qb, qsz, qv,
                                      list(self._eva2sst))
            self._mark_free_unmap_helper(stk, tid, qb, qsz)

        for (loc, sz) in junks:

            self._mark_free_accounting_helper(loc, sz)
            self._eva2sst.mark(loc, sz, SegSt.JUNK)
            self._njunk += sz

            # If it happens that this span may be larger than the cached largest
            # revokable span, invalidate the cache
            if self._brscache is not None:
                (brsnj, _, _) = self._brscache
                (_, qsz, _) = self._eva2sst.get(loc,
                                                coalesce_with_values=sst_tj)
                if qsz >= brsnj:
                    self._brscache = None

            # Update the JUNK LRU
            (qb, qsz) = dll_im_coalesced_insert(loc, sz, self._eva2sst,
                                                self._junklru, self._junkadn)

            self._mark_free_unmap_helper(stk, tid, qb, qsz)

    def _mark_free_accounting_helper(self, loc, sz):
        for (qb, qsz, qv) in self._eva2sst[loc:loc + sz]:
            lim = min(qb + qsz, loc + sz)
            qb = max(loc, qb)
            qsz = lim - qb

            if qv == SegSt.PEND: self._npend -= qsz
            elif qv == SegSt.WAIT: self._nwait -= qsz
            else: assert False

    # If the span is large enough, go ensure that it is unmapped, save
    # possibly for some material on either side.
    # XXX configurable policy
    def _mark_free_unmap_helper(self, stk, tid, qb, qsz):
        if qsz > (16 * 2**self._pagelog):
            self._ensure_unmapped("free " + stk, tid, qb, qsz)

    def _free(self, stk, tid, loc):
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()
        assert self._eva2sst[loc][2] == SegSt.WAIT, "free non-WAIT?"

        # Mark this span as junk
        sz = self._eva2sz.pop(loc)
        self._mark_free(stk, tid, [], [], [(loc, sz)])

    def _free_unsafe(self, stk, tid, loc):
        if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts()
        assert self._eva2sst[loc][2] == SegSt.WAIT, "free non-WAIT?"

        # Immediately mark this span as TIDY, rather than JUNK
        sz = self._eva2sz.pop(loc)
        self._mark_free(stk, tid, [], [(loc, sz)], [])

# --------------------------------------------------------------------- }}}
# Realloc ------------------------------------------------------------- {{{

    def _try_realloc(self, stk, tid, oeva, nsz):
        # XXX
        return False

# --------------------------------------------------------------------- }}}
# Rendering ----------------------------------------------------------- {{{

    def render(self, img):
        from common.render import renderSpansZ
        from PIL import ImageDraw

        sst2color = {
            SegSt.TIDY: 0xFFFFFF,
            SegSt.WAIT: 0x00FF00,
            SegSt.JUNK: 0xFF0000,
        }

        baseva = self._basepg * 2**self._pagelog

        zo = img.width.bit_length() << 1

        renderSpansZ(
            img, zo,
            (((loc - baseva) >> self._alignlog, sz >> self._alignlog,
              sst2color[st])
             for (loc, sz, st) in self._eva2sst.irange(baseva, self._wildern)))

        # Paint over the oldest JUNK span
        oldestj = self._junklru.first
        if oldestj is not None:
            (qb, qsz) = oldestj.value
            qb -= baseva
            renderSpansZ(
                img, zo,
                [(qb >> self._alignlog, qsz >> self._alignlog, 0xFF00FF)])

        # Paint over the oldest TIDY span
        oldestt = self._tidylst.eldest()
        if oldestt is not None:
            (qb, qsz) = oldestt
            qb -= baseva
            renderSpansZ(
                img, zo,
                [(qb >> self._alignlog, qsz >> self._alignlog, 0x00FFFF)])
Beispiel #4
0
class Allocator(TraditionalAllocatorBase):

    __slots__ = (
        '_mtags'  # Memory tag version intervalmap
        '_nvers',  # Number of versions
        '_revoke_all',  # Concurrency is infinitely fast; revoke all free spans
        '_revoke_jwr',  # JUNK/WAIT for revocation
        '_revoke_k',  # Limited revocation facilities
        '_revoke_lru',  # With revoke_k, sample from junklru, too.
    )

    @staticmethod
    def _init_add_args(argp):
        super(__class__, __class__)._init_add_args(argp)
        argp.add_argument('--versions', action='store', type=int, default=16)
        argp.add_argument(
            '--revoke-min',
            action='store',
            type=int,
            default=0,
            help="Suppress revocations reclaiming fewer JUNK bytes")
        argp.add_argument('--revoke-factor',
                          action='store',
                          type=float,
                          default=None,
                          help="Ratio of JUNK to WAIT triggering revocation")
        argp.add_argument('--revoke-k',
                          action='store',
                          type=int,
                          default=None,
                          help="Assume limited revocation facilities")
        argp.add_argument(
            '--revoke-lru',
            action='store',
            type=int,
            default=None,
            help="Ensure old maximal-versioned spans eventually recycled")
        argp.add_argument('--revoke-sort',
                          action='store',
                          type=str,
                          default="clock",
                          choices=["clock", "size"],
                          help="Selection function for limited revocation")
        argp.add_argument(
            '--revoke-all-colors',
            action='store_true',
            default=False,
            help="Revoke all free spans, not just maximal-versioned")

    def _init_handle_args(self, args):
        super(__class__, self)._init_handle_args(args)
        self._nvers = args.versions
        self._revoke_all = args.revoke_all_colors
        self._revoke_jwr = args.revoke_factor
        self._revoke_min = args.revoke_min
        self._revoke_k = args.revoke_k
        self._revoke_lru = args.revoke_lru
        if self._revoke_lru is not None:
            if self._revoke_k is None: self._revoke_k = self._revoke_lru
            else: assert self._revoke_k >= self._revoke_lru
        else:
            self._revoke_lru = 0

        if args.revoke_sort == "clock":
            self._find_largest_revokable_spans = self._find_clockiest_revokable_spans
        elif args.revoke_sort == "size":
            pass

        if args.revoke_k is None and args.revoke_lru is not None:
            raise ValueError("--revoke-lru only sensible with --revoke-k")

        if args.revoke_factor is None and args.revoke_min == 0:
            raise ValueError(
                "Please restrain revocation with --revoke-factor or --revoke-min"
            )

    def __init__(self, *args, **kwargs):
        super(__class__, self).__init__(*args, **kwargs)

        self._mtags = IntervalMap(self._evp2eva(self._basepg),
                                  2**64 - self._evp2eva(self._basepg), 0)

    def _state_asserts(self):
        super(__class__, self)._state_asserts()

        # All JUNK spans are at the maximal version, and all non-JUNK spans are
        # at other versions.
        ist = (x for x in self._eva2sst)
        itg = (x for x in self._mtags)
        for (qb, qsz, qvs) in im_stream_inter(lambda _: ist, lambda _: itg):
            (qst, qtv) = qvs
            if qst == SegSt.JUNK: assert qtv == self._nvers, (qb, qsz, qtv)
            elif qst == SegSt.AHWM: assert qtv == 0
            else: assert qtv != self._nvers

    # XXX
    # We'd like to ask a slightly different question, namely: where can we
    # place this to minimize the advancement of the version clocks.  You might
    # think we'd never advance in allocate, having advanced in free, but if we
    # choose a place formed from several freed spans, we have to advance to the
    # max of all spans we end up using, which might advance some of the clocks
    # quite a bit.
    # ...
    # At the moment, tidylst coalesces all versions together.  We should
    # instead hunt for a minimum of the (byte*clock_delta) sum for the places
    # considered.  We can stop early if we find a zero, of course.
    #
    def _alloc_place(self, stk, sz):
        pos = super()._alloc_place(stk, sz)
        nv = max(v for (_, __, v) in self._mtags[pos:pos + sz])
        assert nv != self._nvers
        self._mtags.mark(pos, sz, nv)
        return pos

    def _free(self, stk, tid, loc):
        sz = self._eva2sz[loc]
        (_, __, v) = self._mtags.get(loc)
        if v == self._nvers - 1:
            super(__class__, self)._free(stk, tid, loc)
            self._mtags.mark(loc, sz, self._nvers)
        else:
            super(__class__, self)._free_unsafe(stk, tid, loc)
            self._mtags.mark(loc, sz, v + 1)

    def _mark_revoked(self, loc, sz):
        super()._mark_revoked(loc, sz)
        self._mtags.mark(loc, sz, 0)

    # Sort by the sum of (sz*version), as that is the value by which we wind
    # back the clock to defer later revocations.
    def _find_clockiest_revokable_spans(self, n=1):
        if n == 0: return

        bests = [(0, 0, -1, -1)
                 ]  # [(clocksum, njunk, loc, sz)] in ascending order
        for (qbase, qsz,
             qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj):

            clocksum = 0
            for (vbase, vsz, vv) in self._mtags[qbase:qbase + qsz]:
                # Don't walk off the end of the last segment
                vsz = min(vsz, qbase + qsz - vbase)
                clocksum += vsz * vv

            if clocksum <= bests[0][0]: continue

            # For internal accounting, also accumulate the number of JUNK bytes
            nj = sum([
                sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz]
                if v == SegSt.JUNK
            ])
            insort(bests, (clocksum, nj, qbase, qsz))

            bests = bests[(-n):]

        return [best[1:] for best in bests if best[2] >= 0]

    # Revocation here does not have a fixed number of windows on which it can
    # operate; everything in the junk (and tidy!) lists can be revoked in a
    # single go.  In implementation, this looks like validating that every
    # capability's contained version field matches the version painted in RAM.
    #
    # The limits of the allocator behavior range from being able to actually
    # get all free spans, regardless of their version (JUNK or TIDY), to being
    # able to reclaim just the ones that were already at the maximal version
    # (i.e. JUNK).  In practice, one could imagine the allocator maintaining
    # a "free epoch" bit and reclaiming (i.e., restoring to version 0) all
    # segments whose free epoch predates the current, now-ending sweep.
    #
    def _maybe_revoke(self):
        # Not above ratio threshold
        if self._revoke_jwr is not None \
           and self._njunk < self._revoke_jwr * self._nwait:
            return

        if self._njunk < self._revoke_min:
            return

        if self._revoke_k is None:
            it = ((jsz, jb, jsz) for (jb, jsz) in self._junklru)
            if self._revoke_all:
                it = itertools.chain(
                    it,
                    ((0, tb, tsz) for (tb, tsz) in self._tidylst.iterlru()))
        else:
            # Allocate room for up to _revoke_lru things from the JUNK LRU.
            nlru = min(self._revoke_lru, len(self._junklru))

            # XXX I'd love to, but boy is this a slow way to do it.
            #
            ## # Estimate reclaim quantity
            ## unjunk = self._find_largest_revokable_spans(n=self._revoke_k-nlru)
            ## unjunk = sum(nj for (nj, _, __) in unjunk)
            ## if unjunk < self._revoke_min:
            ##   return
            #
            # This is going to be much faster due to the brscache.
            #
            unjunk = self._find_largest_revokable_spans(n=1)
            if len(unjunk) * self._revoke_k < self._revoke_min:
                return

            # Do that again, but as part of the set of things to push to the revoker
            it = (x for x in self._find_largest_revokable_spans(
                n=self._revoke_k - nlru))

            # Add from the LRU JUNK queue
            it = itertools.chain(it, ((jsz, jb, jsz)
                                      for (jb, jsz) in self._junklru))

            # XXX Could also pull from the TIDY queue, but should filter by those
            # that contain nonzero versions or sort by the clockiest span or
            # something.
            #
            # XXX Should also coalesce with TIDY spans for the things we pull from
            # the LRU queue.

            # Limit to the number we actually can run
            # XXX This should deduplicate before slicing.  Sigh.
            #
            it = itertools.islice(it, self._revoke_k)

        self._do_revoke(list(it))
class Allocator(TraditionalAllocatorBase):

    __slots__ = (
        '_mtags'  # Memory tag version intervalmap
        '_nvers',  # Number of versions
        '_revoke_all',  # Concurrency is infinitely fast; revoke all free spans
        '_revoke_jwr',  # JUNK/WAIT for revocation
        '_revoke_k',  # Limited revocation facilities
        '_revoke_lru',  # With revoke_k, sample from junklru, too.
        '_revoke_front',  # Put revoked regions at the head of the TIDY list
        '_revoke_jmin',  # Suppress revocation unless this much JUNK accumulated
        '_revoke_tmax',  # Suppress revocation if more than this much TIDY already
        '_prefs',  # va -> Preferred stack flavor
        '_tlpf',  # TIDY List Per Flavor (flavor -> SegFreeList)
        '_stkfaf',  # Stack flavor allocation factor
    )

    @staticmethod
    def _init_add_args(argp):
        super(__class__, __class__)._init_add_args(argp)
        argp.add_argument('--versions', action='store', type=int, default=16)
        argp.add_argument(
            '--revoke-min',
            action='store',
            type=int,
            default=0,
            help="Suppress revocations reclaiming fewer JUNK bytes")
        argp.add_argument('--revoke-max-tidy',
                          action='store',
                          type=int,
                          default=None,
                          help="Suppress revocations if sufficient TIDY bytes")
        argp.add_argument('--revoke-factor',
                          action='store',
                          type=float,
                          default=None,
                          help="Ratio of JUNK to WAIT triggering revocation")
        argp.add_argument('--revoke-k',
                          action='store',
                          type=int,
                          default=None,
                          help="Assume limited revocation facilities")
        argp.add_argument(
            '--revoke-lru',
            action='store',
            type=int,
            default=None,
            help="Ensure old maximal-versioned spans eventually recycled")
        argp.add_argument('--revoke-sort',
                          action='store',
                          type=str,
                          default="clock",
                          choices=["clock", "size"],
                          help="Selection function for limited revocation")
        argp.add_argument(
            '--revoke-all-colors',
            action='store_true',
            default=False,
            help="Revoke all free spans, not just maximal-versioned")
        argp.add_argument(
            '--flavor-open-factor',
            action='store',
            type=int,
            default=1024,
            help="Scale factor when opening a flavored heap region")
        argp.add_argument('--revoke-front',
                          action='store',
                          type=bool,
                          default=True,
                          help="Front revoked spans on the TIDY queue")

    def _init_handle_args(self, args):
        super(__class__, self)._init_handle_args(args)
        self._nvers = args.versions
        self._revoke_all = args.revoke_all_colors
        self._revoke_jwr = args.revoke_factor
        self._revoke_jmin = args.revoke_min
        self._revoke_tmax = args.revoke_max_tidy
        self._revoke_k = args.revoke_k
        self._revoke_lru = args.revoke_lru
        self._revoke_front = args.revoke_front
        self._stkfaf = args.flavor_open_factor
        if self._revoke_lru is not None:
            if self._revoke_k is None: self._revoke_k = self._revoke_lru
            else: assert self._revoke_k >= self._revoke_lru
        else:
            self._revoke_lru = 0

        if args.revoke_sort == "clock":
            self._find_largest_revokable_spans = self._find_clockiest_revokable_spans
        elif args.revoke_sort == "size":
            pass

        if args.revoke_k is None and args.revoke_lru is not None:
            raise ValueError("--revoke-lru only sensible with --revoke-k")

        if args.revoke_factor is None and args.revoke_min == 0:
            raise ValueError(
                "Please restrain revocation with --revoke-factor or --revoke-min"
            )

    def __init__(self, *args, **kwargs):
        super(__class__, self).__init__(*args, **kwargs)

        self._mtags = IntervalMap(self._evp2eva(self._basepg),
                                  2**64 - self._evp2eva(self._basepg), 0)
        self._prefs = IntervalMap(self._evp2eva(self._basepg),
                                  2**64 - self._evp2eva(self._basepg), None)
        self._tlpf = {}
        self._tlpf[None] = SegFreeList()

    def _state_asserts(self):
        super(__class__, self)._state_asserts()

        #### Stack flavors:

        # For each flavored TIDY list, check that...
        for stk in self._tlpf.keys():
            # ... The list itself is OK
            self._tlpf[stk].crossreference_asserts()

            # ... and every element ...
            for (pos, sz) in self._tlpf[stk].iterlru():
                # ... is of the correct flavor
                (pbase, psz, pv) = self._prefs[pos]
                assert pv == stk, ("Mixed-flavor TIDY", (pos, sz, stk),
                                   (pbase, psz, pv))
                assert pbase + psz >= pos + sz, (
                    "More flavored TIDY than preferred", (pos, sz,
                                                          stk), (pbase, psz))

                # ... is actually tidy
                (qbase, qsz, qv) = self._eva2sst[pos]
                assert qv == SegSt.TIDY, ("Flavored TIDY is not TIDY",
                                          (pos, sz, stk), (qbase, qsz, qv))
                assert qbase + qsz >= pos + sz, (
                    "More flavor TIDY than TIDY", (pos, sz, stk),
                    (qbase, qsz), self._eva2sst[qbase + qsz],
                    list(self._tlpf[stk].iterlru()))

                # ... and really does end where it's supposed to: at the next
                # preference or TIDY boundary
                assert pos + sz == min(pbase + psz, qbase + qsz), \
                       ("TIDY segment length mismatch", (pos, sz, stk), (psz, pv), (qsz, qv))

        # Check that all TIDY segments with flavor are on the appropriate flavored TIDY list
        fif = lambda start: (
            (loc, sz, v) for (loc, sz, v) in self._prefs[start:])
        tif = lambda start: ((loc, sz, v)
                             for (loc, sz, v) in self._eva2sst[start:]
                             if v == SegSt.TIDY)
        for (loc, sz, (stk, _)) in im_stream_inter(fif, tif):
            fdns = self._tlpf[stk].adns.get(loc, None)
            assert fdns is not None, \
                ("Unindexed flavored TIDY span", (loc, sz, stk),
                    self._prefs[loc], self._eva2sst[loc],
                    self._tlpf[stk].adns)
            assert (loc, sz) == fdns[0].value, \
                ("Flavored TIDY index mismatch", (loc, sz, stk), fdns[0].value)

        #### MTE:

        # All JUNK spans are at the maximal version, and all non-JUNK spans are
        # at other versions.  Please note that this test is exceptionally slow,
        # due to the large number of segments and color spans that build up, and
        # so it is here but further gated.
        if self._paranoia > 2:
            ist = (x for x in self._eva2sst)
            itg = (x for x in self._mtags)
            for (qb, qsz,
                 (qst, qtv)) in im_stream_inter(lambda _: ist, lambda _: itg):
                if qst == SegSt.JUNK: assert qtv == self._nvers, (qb, qsz, qtv)
                elif qst == SegSt.AHWM: assert qtv == 0
                else: assert qtv != self._nvers

    # XXX
    # We'd like to ask a slightly different question, namely: where can we
    # place this to minimize the advancement of the version clocks.  You might
    # think we'd never advance in allocate, having advanced in free, but if we
    # choose a place formed from several freed spans, we have to advance to the
    # max of all spans we end up using, which might advance some of the clocks
    # quite a bit.
    # ...
    # At the moment, tidylst coalesces all versions together.  We should
    # instead hunt for a minimum of the (byte*clock_delta) sum for the places
    # considered.  We can stop early if we find a zero, of course.
    #
    def _alloc_place_helper(self, stk, sz):
        #### Stack flavor:

        fit = self._tlpf.setdefault(stk, SegFreeList()).iterfor(
            sz, 1 << self._alignlog)

        # Walk the free list to see if we have something of the "stack" flavor laying around.
        for (tpos, tsz) in fit:
            apos = self._eva_align_roundup(tpos)
            if (apos == tpos) or (tpos + tsz >= apos + sz):
                # Remove from the freelist; any residual spans will come back to us in a moment
                self._tlpf[stk].remove(tpos)
                return apos

        # OK, that didn't work out.  Start over, go claim something without preference bigger
        # than the allocation we're tasked with, repaint it to have the current preference,
        # and return the base thereof.
        #
        # Align to multiple of page sizes
        sz = self._evp2eva(self._eva2evp_roundup(sz * self._stkfaf))

        for (tpos, tsz) in self._tlpf[None].iterfor(sz, 1 << self._alignlog):
            apos = self._eva_align_roundup(tpos)
            if tpos + tsz >= apos + self._stkfaf * sz:
                self._tlpf[None].remove(tpos)
                self._prefs.mark(tpos, tsz, stk)
                return apos

        # OK, OK, we really haven't found anything, even if we're willing to
        # repaint.  Go grab at the wilderness; bump the wilderness pointer now to trigger
        # the common case in _mark_allocated; things will be enqueued on our per-flavor TIDY
        # list using _mark_allocated_residual
        #
        # XXX? Round base up to page boundaries and allocate the whole
        # thing for this flavor.

        # pos = self._eva_align_roundup(self._wildern)
        pos = self._evp2eva(self._eva2evp_roundup(self._wildern))
        self._wildern = pos + sz
        self._prefs.mark(pos, self._wildern - pos, stk)

        # This is kind of gross; this segment is not in any of our flavored free
        # lists, and we don't want to put it there, as our _mark_tidy would do.
        # So instead, we reach up to the superclass to update the segment state
        # and rely on duplicating any other work that our _mark_tidy does
        # (which, thankfully, is currently none)
        super()._mark_tidy(pos, sz)

        return pos

    def _alloc_place(self, stk, sz):
        pos = self._alloc_place_helper(stk, sz)

        #### MTE:

        nv = max(v for (_, __, v) in self._mtags[pos:pos + sz])
        assert nv != self._nvers
        self._mtags.mark(pos, sz, nv)

        return pos

    def _mark_allocated_residual(self, stk, loc, sz, isLeft):
        super()._mark_allocated_residual(stk, loc, sz, isLeft)

        #### Stack flavor:

        # This is a bit of a mess.  The "residual" spans that come back to us
        # are from the generic TIDY list, which coalesces across our preferences.
        # So, only queue the residual bits to the preference-respecting TIDY list
        # if the current allocation stack matches the span's preference, and, only then,
        # up to our preference's boundary.  Discontiguous stk-flavored TIDY spans will
        # already be in the right free list (proof by induction).
        if isLeft:
            (qbase, qsz, qv) = self._prefs[loc + sz - 1]
            if stk == qv:
                base = max(qbase, loc)
                self._tlpf[stk].expunge(base, loc + sz - base)
                self._tlpf[stk].insert(base, loc + sz - base)
        else:
            (qbase, qsz, qv) = self._prefs[loc]
            if stk == qv:
                lim = min(qbase + qsz, loc + sz)
                self._tlpf[stk].expunge(loc, lim - loc)
                self._tlpf[stk].insert(loc, lim - loc)

    def _free(self, stk, tid, loc):
        sz = self._eva2sz[loc]

        (_, __, v) = self._mtags.get(loc)
        if v == self._nvers - 1:
            super(__class__, self)._free(stk, tid, loc)
            self._mtags.mark(loc, sz, self._nvers)
        else:
            super(__class__, self)._free_unsafe(stk, tid, loc)
            self._mtags.mark(loc, sz, v + 1)

    def _mark_tidy(self, loc, sz):
        super()._mark_tidy(loc, sz)

        #### Stack flavor:

        for (tloc, tsz, tv) in self._prefs[loc:loc + sz]:
            nloc = max(tloc, loc)
            nsz = min(loc + sz, tloc + tsz) - nloc
            self._tlpf[tv].insert(nloc, nsz)

    def _mark_revoked(self, loc, sz):
        #### Stack flavor:

        # XXX Should we be preserving preferences?  Always?  Sometimes?

        # Remove each overlapping span from each preferrential TIDY list; the parent allocator
        # will take care of managing the global TIDY list.
        for (_, __, pv) in self._prefs[loc:loc + sz]:
            self._tlpf[pv].expunge(loc, sz)

        if sz >= 0:  # XXX
            # Just paint the whole thing as None.
            self._tlpf[None].insert(loc, sz, front=self._revoke_front)
            self._prefs.mark(loc, sz, None)
        else:
            # Preserve preferences and re-queue
            for (qb, qsz, pv) in self._prefs[loc:loc + sz]:
                b = max(qb, loc)
                l = min(qb + qsz, loc + sz)
                self._tlpf[pv].insert(b, l - b, front=self._revoke_front)

        #### MTE:
        self._mtags.mark(loc, sz, 0)

        super()._mark_tidy(loc, sz)

    # Sort by the sum of (sz*version), as that is the value by which we wind
    # back the clock to defer later revocations.
    def _find_clockiest_revokable_spans(self, n=1):
        if n == 0: return
        if n == 1 and self._brscache is not None:
            return [self._brscache]

        bests = [(0, 0, -1, -1)
                 ]  # [(clocksum, njunk, loc, sz)] in ascending order
        for (qbase, qsz,
             qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj):

            clocksum = 0
            for (vbase, vsz, vv) in self._mtags[qbase:qbase + qsz]:
                # Don't walk off the end of the last segment
                vsz = min(vsz, qbase + qsz - vbase)
                clocksum += vsz * vv

            if clocksum <= bests[0][0]: continue

            # For internal accounting, also accumulate the number of JUNK bytes
            nj = sum([
                sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz]
                if v == SegSt.JUNK
            ])
            insort(bests, (clocksum, nj, qbase, qsz))

            bests = bests[(-n):]

        return [best[1:] for best in bests if best[2] >= 0]

    def _revoke_iterator(self):
        for (b, s, v) in self._eva2sst.iter_vfilter(
                None, self._wildern,
                sst_tj if self._revoke_all else [SegSt.JUNK]):
            jsum = sum(sz for (_, sz, v) in self._eva2sst[b:b + s]
                       if v == SegSt.JUNK)
            if jsum == 0: continue
            yield (jsum, b, s)

    # Revocation here does not have a fixed number of windows on which it can
    # operate; everything in the junk (and tidy!) lists can be revoked in a
    # single go.  In implementation, this looks like validating that every
    # capability's contained version field matches the version painted in RAM.
    #
    # The limits of the allocator behavior range from being able to actually
    # get all free spans, regardless of their version (JUNK or TIDY), to being
    # able to reclaim just the ones that were already at the maximal version
    # (i.e. JUNK).  In practice, one could imagine the allocator maintaining
    # a "free epoch" bit and reclaiming (i.e., restoring to version 0) all
    # segments whose free epoch predates the current, now-ending sweep.
    #
    def _maybe_revoke(self):
        # Not above ratio threshold
        if self._revoke_jwr is not None \
           and self._njunk < self._revoke_jwr * self._nwait:
            return

        # Not enough JUNK
        if self._njunk < self._revoke_jmin: return

        # Still enough TIDY?
        if self._revoke_tmax is not None and \
           self._wildern - self._nwait \
             - self._njunk - self.evp2eva(self._baseva) \
            > self._revoke_tmax :
            return

        if self._revoke_k is None:
            it = self._revoke_iterator()
        else:
            # Allocate room for up to _revoke_lru things from the JUNK LRU.
            nlru = min(self._revoke_lru, len(self._junklru))

            # XXX I'd love to, but boy is this a slow way to do it.
            #
            ## # Estimate reclaim quantity
            ## unjunk = self._find_largest_revokable_spans(n=self._revoke_k-nlru)
            ## unjunk = sum(nj for (nj, _, __) in unjunk)
            ## if unjunk < self._revoke_jmin:
            ##   return
            #
            # This is going to be much faster to simulate due to the brscache, though
            # it overestimates the amout we will reclaim from non-LRU spans and gives
            # no credit to LRU spans.
            #
            unjunk = self._find_largest_revokable_spans(n=1)
            if len(unjunk) * (self._revoke_k - nlru) < self._revoke_jmin:
                return

            # Do that again, but as part of the set of things to push to the revoker
            it = (x for x in self._find_largest_revokable_spans(
                n=self._revoke_k - nlru))

            # Add from the LRU JUNK queue
            it = itertools.chain(it, ((jsz, jb, jsz)
                                      for (jb, jsz) in self._junklru))

            # XXX Could also pull from the TIDY queue, but should filter by those
            # that contain nonzero versions or sort by the clockiest span or
            # something.
            #
            # XXX Should also coalesce with TIDY spans for the things we pull from
            # the LRU queue.

            # Limit to the number we actually can run
            # XXX This should deduplicate before slicing.  Sigh.
            #
            it = itertools.islice(it, self._revoke_k)

        rl = list(it)
        rl.reverse(
        )  # XXX reverse is a hack (lower VA last, fronted on LRU queues?)
        self._do_revoke(rl)
        if __debug__: self._state_asserts()
class Allocator(TraditionalAllocatorBase):

  slots = (
      '_prefs' , # Preferred stack flavor for regions of memory
      '_tlpf'  , # TIDY List Per Flavor
      '_stkfaf', # Stack flavor allocation factor
  )

  # The parent class's _tidylst will continue to be all TIDY spans of any
  # flavor, but we additionally track TIDY-per-flavor in _tlpf, including
  # the unflavored spans at _tlpf[None]

  @staticmethod
  def _init_add_args(argp) :
    super(__class__, __class__)._init_add_args(argp)
    argp.add_argument('--flavor-open-factor', action='store', type=int, default=1024,
                      help="Scale factor when opening a flavored heap region")

  def _init_handle_args(self, args) :
    super(__class__, self)._init_handle_args(args)
    self._stkfaf = args.flavor_open_factor

  def __init__(self, **kwargs) :
    super().__init__(**kwargs)

    # XXX
    self._revoke_k = 8
    self._free = self._free_unsafe

    self._prefs = IntervalMap (
                   self._evp2eva(self._basepg),
                   2**64 - self._evp2eva(self._basepg),
                   None ) 
    self._tlpf = {}
    self._tlpf[None] = SegFreeList()

  def _state_asserts(self):
    super()._state_asserts()

    # For each flavored TIDY list, check that...
    for stk in self._tlpf.keys() :
      # ... The list itself is OK
      self._tlpf[stk].crossreference_asserts()

      # ... and every element ...
      for (pos, sz) in self._tlpf[stk].iterlru() :
        # ... is of the correct flavor
        (qbase, qsz, qv) = self._prefs[pos]
        assert qv == stk, ("Mixed-flavor TIDY", (pos, sz, stk), (qbase, qsz, qv))
        assert qbase + qsz >= pos + sz, ("More preferred TIDY than flavored", (pos, sz, stk), (qbase, qsz))

        # ... is actually tidy
        (qbase, qsz, qv) = self._eva2sst[pos]
        assert qv == SegSt.TIDY, ("Flavored TIDY is not TIDY", (pos, sz, stk), (qbase, qsz, qv))
        assert qbase + qsz >= pos + sz, ("More flavor TIDY than TIDY", (pos, sz, stk), (qbase, qsz),
            self._eva2sst[qbase+qsz], list(self._tlpf[stk].iterlru()))

    # Check that all TIDY segments with flavor are on the appropriate flavored TIDY list
    fif = lambda start : ((loc, sz, v) for (loc, sz, v) in self._prefs[start:])
    tif = lambda start : ((loc, sz, v) for (loc, sz, v) in self._eva2sst[start:] if v == SegSt.TIDY)
    for (loc, sz, (stk, _)) in im_stream_inter(fif, tif) :
        assert self._tlpf[stk].adns.get(loc,None) is not None, \
            ("Unindexed flavored TIDY span", (loc, sz, stk),
                self._prefs[loc], self._eva2sst[loc],
                self._tlpf[stk].adns)

  def _mark_allocated_residual(self, stk, loc, sz, isLeft):
    super()._mark_allocated_residual(stk, loc, sz, isLeft)

    # This is a bit of a mess.  The "residual" spans that come back to us
    # are from the generic TIDY list, which coalesces across our preferences.
    # So, only queue the residual bits to the preference-respecting TIDY list
    # if the current allocation stack matches the span's preference, and, only then,
    # up to our preference's boundary.  Discontiguous stk-flavored TIDY spans will
    # already be in the right free list (proof by induction).
    if isLeft :
      (qbase, qsz, qv) = self._prefs[loc+sz-1]
      if stk == qv :
        base = max(qbase, loc)
        self._tlpf[stk].expunge(base, loc + sz - base)
        self._tlpf[stk].insert(base, loc + sz - base)
    else :
      (qbase, qsz, qv) = self._prefs[loc]
      if stk == qv :
        lim = min(qbase + qsz, loc + sz)
        self._tlpf[stk].expunge(loc, lim - loc)
        self._tlpf[stk].insert(loc, lim - loc)

  def _alloc_place(self, stk, sz) :
    fit = self._tlpf.setdefault(stk, SegFreeList()).iterfor(sz, 1 << self._alignlog) 

    # Walk the free list to see if we have something of the "stack" flavor laying around.
    for (tpos, tsz) in fit :
        apos = self._eva_align_roundup(tpos)
        if (apos == tpos) or (tpos + tsz >= apos + sz) :
          # Remove from the freelist; any residual spans will come back to us in a moment
          self._tlpf[stk].remove(tpos)
          return apos

    # OK, that didn't work out.  Take two, go claim something without preference bigger
    # than the allocation we're tasked with, repaint it to have the current preference,
    # and return the base thereof.

    # Yes, but boy is this a slow way to get it, apparently. :(
    # Now we use per-flavor free lists.
    #
    ## for (tpos, tsz) in self._tidylst.iterfor(self._stkfaf * sz, 1 << self._alignlog) :
    ##     for (ppos, psz, pv) in self._prefs[tpos:tpos+tsz] :
    ##         if pv is not None : continue

    ##         psz = min(tpos + tsz, ppos + psz) - ppos
    ##         ppos = max(tpos, ppos)

    ##         apos = self._eva_align_roundup(ppos)
    ##         if ppos + psz >= apos + self._stkfaf * sz :
    ##           self._prefs.mark(ppos, psz, stk)
    ##           return apos

    for (tpos, tsz) in self._tlpf[None].iterfor(self._stkfaf * sz, 1 << self._alignlog) :
      apos = self._eva_align_roundup(tpos)
      if tpos + tsz >= apos + self._stkfaf * sz :
        self._prefs.mark(tpos, tsz, stk)
        self._tlpf[None].remove(tpos)
        return apos

    # OK, OK, we really haven't found anything, even if we're willing to
    # repaint.  Go grab at the wilderness; bump the wilderness pointer now to trigger
    # the common case in _mark_allocated; things will be enqueued on our per-flavor TIDY
    # list using _mark_allocated_residual
    pos = self._eva_align_roundup(self._wildern)
    self._wildern = pos + self._stkfaf * sz
    self._prefs.mark(pos, self._stkfaf * sz, stk)

    # This is kind of gross; this segment is not in any of our flavored free
    # lists, and we don't want to put it there, as our _mark_tidy would do.
    # So instead, we reach up to the superclass to update the segment state
    # and rely on duplicating any other work that our _mark_tidy does
    # (which, thankfully, is currently none)
    super()._mark_tidy(pos, self._stkfaf * sz)

    return pos

  def _mark_tidy(self, loc, sz) :
    super()._mark_tidy(loc, sz)

    for (tloc, tsz, tv) in self._prefs[loc:loc+sz] :
      nloc = max(tloc, loc)
      nsz = min(loc + sz, tloc + tsz) - nloc
      self._tlpf[tv].insert(nloc, nsz)

  def _mark_revoked(self, loc, sz) :
    # Just paint the whole thing as None, though that's potentially rude to any painted
    # spans on either end. (XXX?)

    # Remove each overlapping span from each preferrential TIDY list; the parent allocator
    # will take care of removing it from the global TIDY list.
    for (_, __, pv) in self._prefs[loc:loc+sz] :
      self._tlpf[pv].expunge(loc, sz)

    self._tlpf[None].insert(loc, sz)
    self._prefs.mark(loc, sz, None)

    super()._mark_tidy(loc,sz)

  def _maybe_revoke(self):
    # XXX configurable policy.
    #
    # Should we be looking at both the general state of the heap as well as the occupancies
    # of our preferred regions?

    if self._njunk >= self._nwait and len(self._junklru) >= 16 :
      self._do_revoke_best_and(revoke=[loc for (loc, _) in itertools.islice(self._junklru,8)])