Beispiel #1
0
 def items(self, oids=None):
     # Order the queue by file position, which should help
     # if the file is large and needs to be read
     # sequentially from disk.
     items = [(startpos, endpos, oid_int, prev_tid_int)
              for (oid_int,
                   (startpos, endpos,
                    prev_tid_int)) in iteroiditems(self._queue_contents)
              if oids is None or oid_int in oids]
     items.sort()
     return items
Beispiel #2
0
    def _find_changes_for_viewer(viewer, object_index):
        """
        Given a freshly polled *object_index*, and the *viewer* that polled
        for it, build a changes iterator.

        Call this **before** updating the viewer's MVCC state, so that
        we know how far back we need to build the changes.

        Does not need to hold the lock, except that the index cannot be
        vacuumed until this process is complete (since we may need that for
        building changes).
        """
        if viewer.highest_visible_tid is None or viewer.detached:
            # Snarf. Old state, and we probably lost track of changes.
            # Whelp, it needs to invalidate all its cached objects (so
            # we must return None), but it can still use our index and
            # poll state going forward; we don't need to go backwards.
            logger.debug(
                "Invalidating all persistent objects for viewer %r (detached? %s)",
                viewer, viewer.detached)
            return None

        # Somewhere in the index is a map with the highest visible tid
        # matching the last time this viewer polled. Everything from there
        # forward is a change
        # Note there could be no changes.
        last_poll_time = viewer.highest_visible_tid
        changes = OidTMap()
        change_dicts = []
        for m in object_index.maps:
            if m.highest_visible_tid == last_poll_time:
                break
            change_dicts.append(m)

        while change_dicts:
            # In reverse order, capturing only the most recent change.
            # TODO: Except for that 'ignore_tid' passed to the viewer's
            # poll method, we could very efficiently do this with
            # OidTMap_multiunion with one call to C.
            changes.update(change_dicts.pop())

        return iteroiditems(changes)
Beispiel #3
0
    def _poll_delta_after1(self):
        orig_delta_after1 = self.delta_after1
        oids = list(self.delta_after1)
        # TODO: We have a defined transaction range here that we're concerned
        # about. We might be better off using poller.list_changes(), just like
        # __poll_replace_checkpoints() does.
        logger.debug("Polling %d oids in delta_after1", len(oids))

        def poll_oids_delta1(_conn, cursor):
            return self.adapter.mover.current_object_tids(cursor, oids)

        poll_oids_delta1.transaction_isolation_level = self.adapter.connmanager.isolation_load
        poll_oids_delta1.transaction_read_only = True
        current_tids_for_oids = self.adapter.connmanager.open_and_call(
            poll_oids_delta1)
        self.delta_after1 = type(self.delta_after1)(current_tids_for_oids)
        invalid_oids = {
            oid
            for oid, tid in iteroiditems(orig_delta_after1)
            if oid not in self.delta_after1 or self.delta_after1[oid] != tid
        }
        self.polled_invalid_oids.update(invalid_oids)
        logger.debug("Polled %d oids in delta_after1; %d survived", len(oids),
                     len(oids) - len(invalid_oids))
Beispiel #4
0
    def _find_changes_for_viewer(viewer, object_index):
        """
        Given a freshly polled *object_index*, and the *viewer* that polled
        for it, build a changes iterator.

        Call this **before** updating the viewer's MVCC state, so that
        we know how far back we need to build the changes.

        Does not need to hold the lock, except that the index cannot be
        vacuumed until this process is complete (since we may need that for
        building changes).
        """
        if viewer.highest_visible_tid is None or viewer.detached:
            # Snarf. Old state, and we probably lost track of changes.
            # Whelp, it needs to invalidate all its cached objects (so
            # we must return None), but it can still use our index and
            # poll state going forward; we don't need to go backwards.
            logger.debug(
                "Invalidating all persistent objects for viewer %r (detached? %s)",
                viewer, viewer.detached)
            if viewer.detached:
                client = statsd_client()
                if client is not None:
                    client.incr(
                        'relstorage.cache.mvcc.invalidate_all_detached', 1,
                        1)  # Always send, not a sample. Should be rare.
            return None

        # Somewhere in the index is a map with the highest visible tid
        # matching the last time this viewer polled. Everything from there
        # forward is a change that this viewer needs to see.
        # Note there could be no changes.
        changes = object_index.collect_changes_after(
            viewer.highest_visible_tid)

        return iteroiditems(changes)
Beispiel #5
0
    def __call__(self, checkpoints, row_iter):
        if not checkpoints:
            # Nothing to do except put in correct format, no transforms are possible.
            # XXX: Is there really even any reason to return these? We'll probably
            # never generate keys that match them.
            for row in row_iter:
                yield row[:2], row[2:]
        else:
            delta_after0 = self.delta_after0
            delta_after1 = self.delta_after1
            cp0, cp1 = checkpoints

            # {oid: (state, actual_tid)}
            # This holds things that we're not sure about; we hold onto them
            # and run a big query at the end to determine whether they're still valid or
            # not.
            needs_checked = OID_OBJECT_MAP_TYPE()

            for row in row_iter:
                # Rows are (oid, tid, state, tid), where the two tids
                # are always equal.
                key = row[:2]
                value = row[2:]
                oid = key[0]
                actual_tid = value[1]
                # See __poll_replace_checkpoints() to see how we build
                # the delta maps.
                #
                # We'll poll for changes *after* cp0
                # (because we set that as our current_tid/the
                # storage's prev_polled_tid) and update
                # self._delta_after0, but we won't poll for changes
                # *after* cp1. self._delta_after1 is only ever
                # populated when we shift checkpoints; we assume any
                # changes that happen after that point we catch in an
                # updated self._delta_after0.
                #
                # Also, because we're combining data in the local
                # database from multiple sources, it's *possible* that
                # some old cache had checkpoints that are behind what
                # we're working with now. So we can't actually trust
                # anything that we would put in delta_after1 without
                # validating them. We still return it, but we may take
                # it out of delta_after0 if it turns out to be
                # invalid.

                if actual_tid > cp0:
                    delta_after0[oid] = actual_tid
                elif actual_tid > cp1:
                    delta_after1[oid] = actual_tid
                else:
                    # This is too old and outside our checkpoints for
                    # when something changed. It could be good to have it,
                    # it might be something that doesn't change much.
                    # Unfortunately, we can't just stick it in our fallback
                    # keys (oid, cp0) or (oid, cp1), because it might not be current,
                    # and the storage won't poll this far back.
                    #
                    # The solution is to hold onto it and run a manual poll ourself;
                    # if it's still valid, good. If not, someone should
                    # remove it from the database so we don't keep checking.
                    # We also should only do this poll if we have room in our cache
                    # still (that should rarely be an issue; our db write size
                    # matches our in-memory size except for the first startup after
                    # a reduction in in-memory size.)
                    needs_checked[oid] = value
                    continue
                yield key, value

            # Now validate things that need validated.

            # TODO: Should this be a configurable option, like ZEO's
            # 'drop-rather-invalidate'? So far I haven't seen signs that
            # this will be particularly slow or burdensome.
            self._poll_delta_after1()

            if needs_checked:
                self._poll_old_oids_and_remove(needs_checked)
                for oid, value in iteroiditems(needs_checked):
                    # Anything left is guaranteed to still be at the tid we recorded
                    # for it (except in the event of a concurrent transaction that
                    # changed that object; that should be rare.) So these can go in
                    # our fallback keys.
                    yield (oid, cp0), value