Ejemplo n.º 1
0
    def prefetch_for_conflicts(self, cursor, oid_tid_pairs):
        results = {}
        to_fetch = OidTMap()
        cache_get = self.cache.get

        # if we've never polled, we can't actually use our cache, will
        # just have to make a bulk query.
        if not self.object_index:
            to_fetch = OidTMap(oid_tid_pairs)
        else:
            for key in oid_tid_pairs:
                # Don't update stats/MRU, just as with normal prefetch().
                # It's also important here to avoid taking the lock.
                # We don't store this prefetched data back into the cache because
                # we're just about to overwrite it; we'd have to have multiple writers
                # all with the same initial starting TID lined up to write to the object
                # for that to have any benefit.
                cache_data = cache_get(key, peek=True)
                if not cache_data:
                    to_fetch[key[0]] = key[1]
                else:
                    results[key[0]] = cache_data
                    assert cache_data[1] == key[1]

        if to_fetch:
            check = self._check_tid_after_load if self.object_index else lambda *_, **kw__: None
            for oid, state, tid_int in self.adapter.mover.load_currents(
                    cursor, to_fetch):
                check(oid, tid_int, to_fetch[oid], cursor=cursor)
                results[oid] = (state, tid_int)

        return results
Ejemplo n.º 2
0
    def _find_changes_for_viewer(viewer, object_index):
        """
        Given a freshly polled *object_index*, and the *viewer* that polled
        for it, build a changes iterator.

        Call this **before** updating the viewer's MVCC state, so that
        we know how far back we need to build the changes.

        Does not need to hold the lock, except that the index cannot be
        vacuumed until this process is complete (since we may need that for
        building changes).
        """
        if viewer.highest_visible_tid is None or viewer.detached:
            # Snarf. Old state, and we probably lost track of changes.
            # Whelp, it needs to invalidate all its cached objects (so
            # we must return None), but it can still use our index and
            # poll state going forward; we don't need to go backwards.
            logger.debug(
                "Invalidating all persistent objects for viewer %r (detached? %s)",
                viewer, viewer.detached)
            return None

        # Somewhere in the index is a map with the highest visible tid
        # matching the last time this viewer polled. Everything from there
        # forward is a change
        # Note there could be no changes.
        last_poll_time = viewer.highest_visible_tid
        changes = OidTMap()
        change_dicts = []
        for m in object_index.maps:
            if m.highest_visible_tid == last_poll_time:
                break
            change_dicts.append(m)

        while change_dicts:
            # In reverse order, capturing only the most recent change.
            # TODO: Except for that 'ignore_tid' passed to the viewer's
            # poll method, we could very efficiently do this with
            # OidTMap_multiunion with one call to C.
            changes.update(change_dicts.pop())

        return iteroiditems(changes)
Ejemplo n.º 3
0
    def _vacuum(self, cache, object_index):
        """
        Handle object index and cache entries for which we no longer
        have a requirement.

        Named for the ``VACUUM`` command and process in PostgreSQL,
        this notices when we are keeping index data for transactions
        that are no longer needed.

        When this function is called, the cache doing the polling
        should have been updated with the new object index (and thus
        ``highest_visible_tid``), and thus released its claim to its
        former TID.

        Usually, ``object_index`` will be the same as ``self.object_index``
        but that's not required to enable vacuuming of divergent indices
        in concurrent situations.

        If the given ``object_index`` has a ``minimum_highest_visible_tid``
        (i.e., the oldest polled transaction) that is now less than
        the oldest polled transaction needed by any extent cache
        registered to this coordinator, we are free to vacuum that
        oldest state.

        Usually, the next state's ``minimum_highest_visible_tid`` will
        match exactly our new required minimum, but in case of divergent
        indices, there may be a gap. We do not want to vacuum in that case
        because, even though the objects with the partially diverged index
        will still be able to read just fine, we may prematurely remove
        cached object states that they need.
        """
        # This is called for every transaction. It needs to be fast, and mindful
        # of what it logs.
        #
        # MVCC can easily develop "gaps", where one lone reader is at
        # the back and all the other readers are up front somewhere,
        # with that chain of maps in between doing no one any good. We
        # should try to squash those gaps.

        # We partly deal with that by deciding to cut off the oldest viewers
        # (probably idle connections sitting in the pool) if the total depth
        # gets too deep, or the total size gets too large. Do that first.
        if (object_index.depth > self.max_allowed_index_depth
                or object_index.total_size > self.max_allowed_index_size):
            self.detach_viewers_at_minimum()

        required_tid = self.minimum_highest_visible_tid  # This won't change during our invocation
        local_client = cache.local_client
        self.log(
            TRACE,
            "Attempting vacuum from %s up to %s",
            object_index.minimum_highest_visible_tid,
            required_tid,
        )
        oids_tids_to_del = OidTMap()
        while 1:
            if object_index.depth == 1:
                # Nothing left to vacuum
                break
            if object_index.minimum_highest_visible_tid == required_tid:
                # Still need this history.
                break
            next_state = object_index.maps[-2]
            if required_tid and next_state.highest_visible_tid > required_tid:
                # The last state isn't quite obsolete, others are still looking
                # at that range. Don't remove it.
                break

            # all remaining valid viewers have highest_visible_tid > this one
            # So any OIDs that exist in both this bucket and any newer bucket with a newer
            # TID can be purged from the local cache because they've been changed.
            obsolete_bucket = object_index.maps.pop()
            # Immediately also mark it as closed before we start mutating its
            # contents. No more storing to this one!
            obsolete_bucket.accepts_writes = False
            newer_oids = object_index.keys()
            in_both = OidTMap_intersection(newer_oids, obsolete_bucket)

            self.log(TRACE,
                     "Examining %d old OIDs to see if they've been replaced",
                     len(in_both))

            for oid in in_both:
                old_tid = obsolete_bucket[oid]
                newer_tid = object_index[oid]
                # We intersected, we're sure that they're both not None.
                if newer_tid != old_tid:
                    # Note that even though we're removing data from
                    # this bucket that might be in the range that it
                    # claims to have complete index data for, that's
                    # fine: The end result when we put everything back
                    # together is still going to be complete index
                    # data, because the object changed in the future.
                    # This particular transaction chunk won't be complete, but
                    # it's inaccessible.
                    # This is where we should hook in the 'invalidation' tracing.
                    del obsolete_bucket[oid]
                    oids_tids_to_del[
                        oid] = old_tid  # These will just keep going up
                    # If we have a shared memcache, we can't be sure everyone
                    # else is done with this key, so we just leave it alone.

            # Now at this point, the obsolete_bucket contains data that we know is
            # either not present in a future map, or is present with exactly the
            # same value. Therefore, at least until someone changes it again,
            # we can consider this data to be frozen. We'll make available each
            # cached value at a special key. There's nothing
            # useful to have in this last bucket and we can throw it away. Note that
            # we do *not* remove the index entries; they're needed to keep
            # the CST in sync for newer transactions that might still be open.
            if obsolete_bucket:
                self.log(TRACE, "Vacuum: Freezing %s old OIDs",
                         len(obsolete_bucket))
                local_client.freeze(obsolete_bucket)

        if oids_tids_to_del:
            local_client.delitems(oids_tids_to_del)
Ejemplo n.º 4
0
    def write_to_sqlite(self, connection, checkpoints, object_index=None):
        # pylint:disable=too-many-locals
        mem_before = get_memory_usage()
        object_index = object_index or OidTMap()

        cur = connection.cursor()

        db = Database.from_connection(connection)
        begin = time.time()

        # In a large benchmark, store_temp() accounts for 32%
        # of the total time, while move_from_temp accounts for 49%.

        # The batch size depends on how many params a stored proc can
        # have; if we go too big we get OperationalError: too many SQL
        # variables. The default is 999.
        # Note that the multiple-value syntax was added in
        # 3.7.11, 2012-03-20.
        with _timer() as batch_timer:
            cur.execute('BEGIN')
            stored_oid_tid = db.oid_to_tid
            fetch_current = time.time()
            count_written, _ = db.store_temp(
                self._items_to_write(stored_oid_tid))
            cur.execute("COMMIT")

        # Take out (reserved write) locks now; if we don't, we can get
        # 'OperationalError: database is locked' (the SQLITE_BUSY
        # error code; the SQLITE_LOCKED error code is about table
        # locks and has the string 'database table is locked') But
        # beginning immediate lets us stand in line.
        #
        # Note that the SQLITE_BUSY error on COMMIT happens "if an
        # another thread or process has a shared lock on the database
        # that prevented the database from being updated.", But "When
        # COMMIT fails in this way, the transaction remains active and
        # the COMMIT can be retried later after the reader has had a
        # chance to clear." So we could retry the commit a couple
        # times and give up if can't get there. It looks like one
        # production database takes about 2.5s to execute this entire function;
        # it seems like most instances that are shutting down get to this place
        # at roughly the same time and stack up waiting:
        #
        # Instance | Save Time | write_to_sqlite time
        #     1    |   2.36s   |   2.35s
        #     2    |   5.31s   |   5.30s
        #     3    |   6.51s   |   6.30s
        #     4    |   7.91s   |   7.90s
        #
        # That either suggests that all waiting happens just to acquire this lock and
        # commit this transaction (and that subsequent operations are inconsequential
        # in terms of time) OR that the exclusive lock isn't truly getting dropped,
        # OR that some function in subsequent processes is taking longer and longer.
        # And indeed, our logging showed that a gc.collect() at the end of this function was
        # taking more and more time:
        #
        # Instance | GC Time  | Reported memory usage after GC
        #    1     |   2.00s  |     34.4MB
        #    2     |   3.50s  |     83.2MB
        #    3     |   4.94s  |    125.9MB
        #    4     |   6.44   |    202.1MB
        #
        # The exclusive lock time did vary, but not as much; trim time
        # didn't really vary:
        #
        # Instance |  Exclusive Write Time | Batch Insert Time | Fetch Current
        #    1     |   0.09s               |   0.12s           |   0.008s
        #    2     |   1.19s               |   0.44s           |   0.008s
        #    3     |   0.91s               |   0.43s           |   0.026s
        #    4     |   0.92s               |   0.34s           |   0.026s
        with _timer() as exclusive_timer:
            cur.execute('BEGIN IMMEDIATE')
            # During the time it took for us to commit, and the time that we
            # got the lock, it's possible that someone else committed and
            # changed the data in object_state
            # Things might have changed in the database since our
            # snapshot where we put temp objects in, so we can't rely on
            # just assuming that's the truth anymore.
            rows_inserted = db.move_from_temp()
            if checkpoints:
                db.update_checkpoints(*checkpoints)

            cur.execute('COMMIT')
        # TODO: Maybe use BTrees.family.intersection to get the common keys?
        # Or maybe use a SQLite Python function?
        # 'DELETE from object_state where is_stale(zoid, tid)'
        # What's fastest? The custom function version would require scanning the entire
        # table; unfortunately this sqlite interface doesn't allow defining virtual tables.
        with _timer() as trim_timer:
            # Delete anything we still know is stale, because we
            # saw a new TID for it, but we had nothing to replace it with.
            min_allowed_writeback = OidTMap()
            db.trim_to_size(self.limit, min_allowed_writeback)
            del min_allowed_writeback

        # Cleanups
        cur.close()
        del cur
        db.close()  # closes the connection.
        del db
        del stored_oid_tid

        # We're probably shutting down, don't perform a GC; we see
        # that can get quite lengthy.

        end = time.time()
        stats = self.stats()
        mem_after = get_memory_usage()
        logger.info(
            "Wrote %d items to %s in %s "
            "(%s to fetch current, %s to insert batch (%d rows), %s to write, %s to trim). "
            "Used %s memory. (Storage: %s) "
            "Total hits %s; misses %s; ratio %s (stores: %s)", count_written,
            connection, end - begin, fetch_current - begin,
            batch_timer.duration, rows_inserted,
            exclusive_timer.duration, trim_timer.duration,
            byte_display(mem_after - mem_before), self._cache, stats['hits'],
            stats['misses'], stats['ratio'], stats['sets'])

        return count_written