Ejemplo n.º 1
0
    def move_from_temp(self, cursor, tid, txn_has_blobs):
        """Moved the temporarily stored objects to permanent storage.

        Returns the list of oids stored.
        """

        if self.keep_history:
            stmt = self._move_from_temp_hp_insert_query
            cursor.execute(stmt, (tid,))
        else:
            self._move_from_temp_object_state(cursor, tid)

            if txn_has_blobs:
                stmt = """
                DELETE FROM blob_chunk
                WHERE zoid IN (SELECT zoid FROM temp_store)
                """
                cursor.execute(stmt)

        if txn_has_blobs:
            stmt = self._move_from_temp_copy_blob_query
            cursor.execute(stmt, (tid,))

        stmt = """
        SELECT zoid FROM temp_store
        """
        cursor.execute(stmt)
        return [oid for (oid,) in fetchmany(cursor)]
Ejemplo n.º 2
0
    def _add_refs_for_oids(self, cursor, oids, get_references):
        """Fill object_refs with the states for some objects.

        Returns the number of references added.
        """
        oid_list = ','.join(str(oid) for oid in oids)
        use_base64 = (self.database_type == 'postgresql')

        if use_base64:
            stmt = """
            SELECT zoid, tid, encode(state, 'base64')
            FROM object_state
            WHERE zoid IN (%s)
            """ % oid_list
        else:
            stmt = """
            SELECT zoid, tid, state
            FROM object_state
            WHERE zoid IN (%s)
            """ % oid_list
        self.runner.run_script_stmt(cursor, stmt)

        add_objects = []
        add_refs = []
        for from_oid, tid, state in fetchmany(cursor):
            if hasattr(state, 'read'):
                # Oracle
                state = state.read()
            add_objects.append((from_oid, tid))
            if state:
                state = decode_bytes_param(state, use_base64)
                try:
                    to_oids = get_references(state)
                except:
                    log.error("pre_pack: can't unpickle "
                        "object %d in transaction %d; state length = %d" % (
                        from_oid, tid, len(state)))
                    raise
                for to_oid in to_oids:
                    add_refs.append((from_oid, tid, to_oid))

        if not add_objects:
            return 0

        stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list
        self.runner.run_script_stmt(cursor, stmt)
        stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list
        self.runner.run_script_stmt(cursor, stmt)

        stmt = """
        INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s)
        """
        self.runner.run_many(cursor, stmt, add_refs)

        stmt = """
        INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s)
        """
        self.runner.run_many(cursor, stmt, add_objects)

        return len(add_refs)
Ejemplo n.º 3
0
    def _add_refs_for_tid(self, cursor, tid, get_references):
        """Fill object_refs with all states for a transaction.

        Returns the number of references added.
        """
        log.debug("pre_pack: transaction %d: computing references ", tid)
        from_count = 0
        stmt = """
            SELECT zoid, state
            FROM object_state
            WHERE tid = %(tid)s
            """
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        add_rows = []  # [(from_oid, tid, to_oid)]
        for from_oid, state in fetchmany(cursor):
            state = db_binary_to_bytes(state)
            if hasattr(state, 'read'):
                # Oracle
                state = state.read()
            if state:
                assert isinstance(state, bytes), type(
                    state)  # PY3: used to be str(state)
                from_count += 1
                try:
                    to_oids = get_references(state)
                except:
                    log.error(
                        "pre_pack: can't unpickle "
                        "object %d in transaction %d; state length = %d" %
                        (from_oid, tid, len(state)))
                    raise
                for to_oid in to_oids:
                    add_rows.append((from_oid, tid, to_oid))

        # A previous pre-pack may have been interrupted.  Delete rows
        # from the interrupted attempt.
        stmt = "DELETE FROM object_ref WHERE tid = %(tid)s"
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        # Add the new references.
        stmt = """
        INSERT INTO object_ref (zoid, tid, to_zoid)
        VALUES (%s, %s, %s)
        """
        self.runner.run_many(cursor, stmt, add_rows)

        # The references have been computed for this transaction.
        stmt = """
        INSERT INTO object_refs_added (tid)
        VALUES (%(tid)s)
        """
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        to_count = len(add_rows)
        log.debug(
            "pre_pack: transaction %d: has %d reference(s) "
            "from %d object(s)", tid, to_count, from_count)
        return to_count
Ejemplo n.º 4
0
    def fill_object_refs(self, conn, cursor, get_references):
        """Update the object_refs table by analyzing new object states.

        Note that ZODB connections can change the object states while this
        method is running, possibly obscuring object references,
        so this method runs repeatedly until it detects no changes between
        two passes.
        """
        holding_commit = False
        attempt = 0
        while True:
            attempt += 1
            if attempt >= 3 and not holding_commit:
                # Starting with the third attempt, hold the commit lock
                # to prevent changes.
                holding_commit = True
                self.locker.hold_commit_lock(cursor)

            stmt = """
            SELECT object_state.zoid FROM object_state
                LEFT JOIN object_refs_added
                    ON (object_state.zoid = object_refs_added.zoid)
            WHERE object_refs_added.tid IS NULL
                OR object_refs_added.tid != object_state.tid
            ORDER BY object_state.zoid
            """
            self.runner.run_script_stmt(cursor, stmt)
            oids = [oid for (oid, ) in fetchmany(cursor)]
            log_at = time.time() + 60
            if oids:
                if attempt == 1:
                    self.on_filling_object_refs()
                oid_count = len(oids)
                oids_done = 0
                log.info("pre_pack: analyzing references from %d object(s)",
                         oid_count)
                while oids:
                    batch = oids[:100]
                    oids = oids[100:]
                    self._add_refs_for_oids(cursor, batch, get_references)
                    oids_done += len(batch)
                    now = time.time()
                    if now >= log_at:
                        # Save the work done so far.
                        conn.commit()
                        log_at = now + 60
                        log.info("pre_pack: objects analyzed: %d/%d",
                                 oids_done, oid_count)
                conn.commit()
                log.info("pre_pack: objects analyzed: %d/%d", oids_done,
                         oid_count)
            else:
                # No changes since last pass.
                break
        if holding_commit:
            self.locker.release_commit_lock(cursor)
            log.info("attempts: %d - lock released", attempt)
Ejemplo n.º 5
0
    def _add_refs_for_tid(self, cursor, tid, get_references):
        """Fill object_refs with all states for a transaction.

        Returns the number of references added.
        """
        log.debug("pre_pack: transaction %d: computing references ", tid)
        from_count = 0
        stmt = """
            SELECT zoid, state
            FROM object_state
            WHERE tid = %(tid)s
            """
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        add_rows = []  # [(from_oid, tid, to_oid)]
        for from_oid, state in fetchmany(cursor):
            state = db_binary_to_bytes(state)
            if hasattr(state, 'read'):
                # Oracle
                state = state.read()
            if state:
                assert isinstance(state, bytes), type(state) # PY3: used to be str(state)
                from_count += 1
                try:
                    to_oids = get_references(state)
                except:
                    log.error(
                        "pre_pack: can't unpickle "
                        "object %d in transaction %d; state length = %d" % (
                            from_oid, tid, len(state)))
                    raise
                for to_oid in to_oids:
                    add_rows.append((from_oid, tid, to_oid))

        # A previous pre-pack may have been interrupted.  Delete rows
        # from the interrupted attempt.
        stmt = "DELETE FROM object_ref WHERE tid = %(tid)s"
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        # Add the new references.
        stmt = """
        INSERT INTO object_ref (zoid, tid, to_zoid)
        VALUES (%s, %s, %s)
        """
        self.runner.run_many(cursor, stmt, add_rows)

        # The references have been computed for this transaction.
        stmt = """
        INSERT INTO object_refs_added (tid)
        VALUES (%(tid)s)
        """
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        to_count = len(add_rows)
        log.debug("pre_pack: transaction %d: has %d reference(s) "
            "from %d object(s)", tid, to_count, from_count)
        return to_count
Ejemplo n.º 6
0
    def fill_object_refs(self, conn, cursor, get_references):
        """Update the object_refs table by analyzing new object states.

        Note that ZODB connections can change the object states while this
        method is running, possibly obscuring object references,
        so this method runs repeatedly until it detects no changes between
        two passes.
        """
        holding_commit = False
        attempt = 0
        while True:
            attempt += 1
            if attempt >= 3 and not holding_commit:
                # Starting with the third attempt, hold the commit lock
                # to prevent changes.
                holding_commit = True
                self.locker.hold_commit_lock(cursor)

            stmt = """
            SELECT object_state.zoid FROM object_state
                LEFT JOIN object_refs_added
                    ON (object_state.zoid = object_refs_added.zoid)
            WHERE object_refs_added.tid IS NULL
                OR object_refs_added.tid != object_state.tid
            ORDER BY object_state.zoid
            """
            self.runner.run_script_stmt(cursor, stmt)
            oids = [oid for (oid,) in fetchmany(cursor)]
            log_at = time.time() + 60
            if oids:
                if attempt == 1:
                    self.on_filling_object_refs()
                oid_count = len(oids)
                oids_done = 0
                log.info(
                    "pre_pack: analyzing references from %d object(s)",
                    oid_count)
                while oids:
                    batch = oids[:100]
                    oids = oids[100:]
                    self._add_refs_for_oids(cursor, batch, get_references)
                    oids_done += len(batch)
                    now = time.time()
                    if now >= log_at:
                        # Save the work done so far.
                        conn.commit()
                        log_at = now + 60
                        log.info(
                            "pre_pack: objects analyzed: %d/%d",
                            oids_done, oid_count)
                conn.commit()
                log.info(
                    "pre_pack: objects analyzed: %d/%d", oids_done, oid_count)
            else:
                # No changes since last pass.
                break
Ejemplo n.º 7
0
    def _pack_transaction(self, cursor, pack_tid, tid, packed, has_removable,
                          packed_list):
        """Pack one transaction.  Requires populated pack tables."""
        log.debug("pack: transaction %d: packing", tid)
        removed_objects = 0
        removed_states = 0

        if has_removable:
            stmt = self._script_pack_current_object
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            removed_objects = cursor.rowcount

            stmt = self._script_pack_object_state
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            removed_states = cursor.rowcount

            # Terminate prev_tid chains
            stmt = """
            UPDATE object_state SET prev_tid = 0
            WHERE prev_tid = %(tid)s
                AND tid <= %(pack_tid)s
            """
            self.runner.run_script_stmt(cursor, stmt, {
                'pack_tid': pack_tid,
                'tid': tid
            })

            stmt = """
            SELECT pack_state.zoid
            FROM pack_state
            WHERE pack_state.tid = %(tid)s
            """
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            for (oid, ) in fetchmany(cursor):
                packed_list.append((oid, tid))

        # Find out whether the transaction is empty
        stmt = self._script_transaction_has_data
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
        empty = not list(cursor)

        # mark the transaction packed and possibly empty
        if empty:
            clause = 'empty = %(TRUE)s'
            state = 'empty'
        else:
            clause = 'empty = %(FALSE)s'
            state = 'not empty'
        stmt = "UPDATE transaction SET packed = %(TRUE)s, " + clause
        stmt += " WHERE tid = %(tid)s"
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        log.debug(
            "pack: transaction %d (%s): removed %d object(s) and %d state(s)",
            tid, state, removed_objects, removed_states)
Ejemplo n.º 8
0
    def _pack_transaction(self, cursor, pack_tid, tid, packed,
            has_removable, packed_list):
        """Pack one transaction.  Requires populated pack tables."""
        log.debug("pack: transaction %d: packing", tid)
        removed_objects = 0
        removed_states = 0

        if has_removable:
            stmt = self._script_pack_current_object
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            removed_objects = cursor.rowcount

            stmt = self._script_pack_object_state
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            removed_states = cursor.rowcount

            # Terminate prev_tid chains
            stmt = """
            UPDATE object_state SET prev_tid = 0
            WHERE prev_tid = %(tid)s
                AND tid <= %(pack_tid)s
            """
            self.runner.run_script_stmt(cursor, stmt,
                {'pack_tid': pack_tid, 'tid': tid})

            stmt = """
            SELECT pack_state.zoid
            FROM pack_state
            WHERE pack_state.tid = %(tid)s
            """
            self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
            for (oid,) in fetchmany(cursor):
                packed_list.append((oid, tid))

        # Find out whether the transaction is empty
        stmt = self._script_transaction_has_data
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})
        empty = not list(cursor)

        # mark the transaction packed and possibly empty
        if empty:
            clause = 'empty = %(TRUE)s'
            state = 'empty'
        else:
            clause = 'empty = %(FALSE)s'
            state = 'not empty'
        stmt = "UPDATE transaction SET packed = %(TRUE)s, " + clause
        stmt += " WHERE tid = %(tid)s"
        self.runner.run_script_stmt(cursor, stmt, {'tid': tid})

        log.debug(
            "pack: transaction %d (%s): removed %d object(s) and %d state(s)",
            tid, state, removed_objects, removed_states)
Ejemplo n.º 9
0
 def current_object_tids(self, cursor, oids):
     """Returns the current {oid: tid} for specified object ids."""
     res = {}
     _stmt = self._current_object_tids_query
     oids = list(oids)
     while oids:
         # XXX: Dangerous (SQL injection)! And probably slow. Can we do better?
         oid_list = ','.join(str(oid) for oid in oids[:1000])
         del oids[:1000]
         stmt = _stmt % (oid_list,)
         cursor.execute(stmt)
         for oid, tid in fetchmany(cursor):
             res[oid] = tid
     return res
Ejemplo n.º 10
0
 def generic_current_object_tids(self, cursor, oids):
     """Returns the current {oid: tid} for specified object ids."""
     res = {}
     if self.keep_history:
         table = 'current_object'
     else:
         table = 'object_state'
     oids = list(oids)
     while oids:
         oid_list = ','.join(str(oid) for oid in oids[:1000])
         del oids[:1000]
         stmt = "SELECT zoid, tid FROM %s WHERE zoid IN (%s)" % (table,
                                                                 oid_list)
         cursor.execute(stmt)
         for oid, tid in fetchmany(cursor):
             res[oid] = tid
     return res
Ejemplo n.º 11
0
 def generic_current_object_tids(self, cursor, oids):
     """Returns the current {oid: tid} for specified object ids."""
     res = {}
     if self.keep_history:
         table = 'current_object'
     else:
         table = 'object_state'
     oids = list(oids)
     while oids:
         oid_list = ','.join(str(oid) for oid in oids[:1000])
         del oids[:1000]
         stmt = "SELECT zoid, tid FROM %s WHERE zoid IN (%s)" % (
             table, oid_list)
         cursor.execute(stmt)
         for oid, tid in fetchmany(cursor):
             res[oid] = tid
     return res
Ejemplo n.º 12
0
    def _traverse_graph(self, cursor):
        """Visit the entire object graph to find out what should be kept.

        Sets the pack_object.keep flags.
        """
        log.info("pre_pack: downloading pack_object and object_ref.")

        # Note: TreeSet can be updated at random much faster than Set,
        # but TreeSet consumes more memory. (Random TreeSet updates are
        # probably O(log n) while random Set updates are probably O(n).
        # OTOH, adding to Sets or TreeSets in order is an O(1) operation.)
        Set = BTrees.family64.II.Set
        TreeSet = BTrees.family64.II.TreeSet
        Bucket = BTrees.family64.IO.Bucket
        set_difference = BTrees.family64.II.difference

        # Download the list of root objects to keep from pack_object.
        keep_set = TreeSet()
        stmt = """
        SELECT zoid
        FROM pack_object
        WHERE keep = %(TRUE)s
        """
        self.runner.run_script_stmt(cursor, stmt)
        for from_oid, in fetchmany(cursor):
            keep_set.insert(from_oid)

        # Download the list of object references into all_refs.
        all_refs = Bucket()  # {from_oid: set([to_oid])}
        # Note the Oracle optimizer hints in the following statement; MySQL
        # and PostgreSQL ignore these. Oracle fails to notice that pack_object
        # is now filled and chooses the wrong execution plan, completely
        # killing this query on large RelStorage databases, unless these hints
        # are included.
        stmt = """
        SELECT
            /*+ FULL(object_ref) */
            /*+ FULL(pack_object) */
            object_ref.zoid, object_ref.to_zoid
        FROM object_ref
            JOIN pack_object ON (object_ref.zoid = pack_object.zoid)
        WHERE object_ref.tid >= pack_object.keep_tid
        ORDER BY object_ref.zoid, object_ref.to_zoid
        """
        # While downloading the OIDs, move them to Set and Bucket
        # objects. A Set takes a lot less RAM than Python integer sets.

        # Grouped by object_ref.zoid, store all object_ref.to_zoid in sets.
        self.runner.run_script_stmt(cursor, stmt)
        for from_oid, rows in groupby(fetchmany(cursor), itemgetter(0)):
            d = all_refs.get(from_oid)
            if d is None:
                all_refs[from_oid] = d = Set()
            d.update(row[1] for row in rows)

        # Traverse the object graph.  Add all of the reachable OIDs
        # to keep_set.
        log.info("pre_pack: traversing the object graph "
                 "to find reachable objects.")
        parents = Set(keep_set)
        pass_num = 0
        while parents:
            pass_num += 1
            children = TreeSet()
            for parent in parents:
                to_oids = all_refs.get(parent)
                if to_oids:
                    children.update(to_oids)
            parents = set_difference(children, keep_set)
            keep_set.update(parents)
            log.debug("pre_pack: found %d more referenced object(s) in "
                      "pass %d", len(parents), pass_num)

        # Set pack_object.keep for all OIDs in keep_set.
        del all_refs  # Free some RAM
        log.info("pre_pack: marking objects reachable: %d", len(keep_set))
        batch = []

        def upload_batch():
            oids_str = ','.join(str(oid) for oid in batch)
            del batch[:]
            stmt = """
            UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s
            WHERE zoid IN (%s)
            """ % oids_str
            self.runner.run_script_stmt(cursor, stmt)

        for oid in keep_set:
            batch.append(oid)
            if len(batch) >= 1000:
                upload_batch()
        if batch:
            upload_batch()
Ejemplo n.º 13
0
    def generic_move_from_temp(self, cursor, tid, txn_has_blobs):
        """Moved the temporarily stored objects to permanent storage.

        Returns the list of oids stored.
        """
        if self.keep_history:
            if self.database_type == 'oracle':
                stmt = """
                INSERT INTO object_state
                    (zoid, tid, prev_tid, md5, state_size, state)
                SELECT zoid, :1, prev_tid, md5,
                    COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
            else:
                stmt = """
                INSERT INTO object_state
                    (zoid, tid, prev_tid, md5, state_size, state)
                SELECT zoid, %s, prev_tid, md5,
                    COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
            cursor.execute(stmt, (tid, ))

        else:
            if self.database_type == 'mysql':
                stmt = """
                REPLACE INTO object_state (zoid, tid, state_size, state)
                SELECT zoid, %s, COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
                cursor.execute(stmt, (tid, ))

            else:
                stmt = """
                DELETE FROM object_state
                WHERE zoid IN (SELECT zoid FROM temp_store)
                """
                cursor.execute(stmt)

                if self.database_type == 'oracle':
                    stmt = """
                    INSERT INTO object_state (zoid, tid, state_size, state)
                    SELECT zoid, :1, COALESCE(LENGTH(state), 0), state
                    FROM temp_store
                    """
                else:
                    stmt = """
                    INSERT INTO object_state (zoid, tid, state_size, state)
                    SELECT zoid, %s, COALESCE(LENGTH(state), 0), state
                    FROM temp_store
                    """
                cursor.execute(stmt, (tid, ))

            if txn_has_blobs:
                stmt = """
                DELETE FROM blob_chunk
                WHERE zoid IN (SELECT zoid FROM temp_store)
                """
                cursor.execute(stmt)

        if txn_has_blobs:
            if self.database_type == 'oracle':
                stmt = """
                INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk)
                SELECT zoid, :1, chunk_num, chunk
                FROM temp_blob_chunk
                """
            else:
                stmt = """
                INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk)
                SELECT zoid, %s, chunk_num, chunk
                FROM temp_blob_chunk
                """
            cursor.execute(stmt, (tid, ))

        stmt = """
        SELECT zoid FROM temp_store
        """
        cursor.execute(stmt)
        return [oid for (oid, ) in fetchmany(cursor)]
Ejemplo n.º 14
0
    def pack(self, pack_tid, sleep=None, packed_func=None):
        """Pack.  Requires the information provided by pre_pack."""

        # Read committed mode is sufficient.
        conn, cursor = self.connmanager.open()
        try:
            try:
                stmt = """
                SELECT transaction.tid,
                    CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END,
                    CASE WHEN pack_state_tid.tid IS NOT NULL THEN 1 ELSE 0 END
                FROM transaction
                    LEFT JOIN pack_state_tid ON (
                        transaction.tid = pack_state_tid.tid)
                WHERE transaction.tid > 0
                    AND transaction.tid <= %(pack_tid)s
                    AND (packed = %(FALSE)s OR pack_state_tid.tid IS NOT NULL)
                """
                self.runner.run_script_stmt(
                    cursor, stmt, {'pack_tid': pack_tid})
                tid_rows = list(fetchmany(cursor))
                tid_rows.sort()  # oldest first

                total = len(tid_rows)
                log.info("pack: will pack %d transaction(s)", total)

                stmt = self._script_create_temp_pack_visit
                if stmt:
                    self.runner.run_script(cursor, stmt)

                # Hold the commit lock while packing to prevent deadlocks.
                # Pack in small batches of transactions only after we are able
                # to obtain a commit lock in order to minimize the
                # interruption of concurrent write operations.
                start = time.time()
                packed_list = []
                counter, lastreport, statecounter = 0, 0, 0
                # We'll report on progress in at most .1% step increments
                reportstep = max(total / 1000, 1)

                self._pause_pack_until_lock(cursor, sleep)
                for tid, packed, has_removable in tid_rows:
                    self._pack_transaction(
                        cursor, pack_tid, tid, packed, has_removable,
                        packed_list)
                    counter += 1
                    if time.time() >= start + self.options.pack_batch_timeout:
                        conn.commit()
                        if packed_func is not None:
                            for oid, tid in packed_list:
                                packed_func(oid, tid)
                        statecounter += len(packed_list)
                        if counter >= lastreport + reportstep:
                            log.info("pack: packed %d (%.1f%%) transaction(s), "
                                "affecting %d states",
                                counter, counter/float(total)*100,
                                statecounter)
                            lastreport = counter / reportstep * reportstep
                        del packed_list[:]
                        self.locker.release_commit_lock(cursor)
                        self._pause_pack_until_lock(cursor, sleep)
                        start = time.time()
                if packed_func is not None:
                    for oid, tid in packed_list:
                        packed_func(oid, tid)
                packed_list = None

                self._pack_cleanup(conn, cursor, sleep)

            except:
                log.exception("pack: failed")
                conn.rollback()
                raise

            else:
                log.info("pack: finished successfully")
                conn.commit()

        finally:
            self.connmanager.close(conn, cursor)
Ejemplo n.º 15
0
 def _fetchmany(self, cursor):
     return fetchmany(cursor)
Ejemplo n.º 16
0
    def _add_refs_for_oids(self, cursor, oids, get_references):
        """Fill object_refs with the states for some objects.

        Returns the number of references added.
        """
        oid_list = ','.join(str(oid) for oid in oids)
        use_base64 = (self.database_type == 'postgresql')

        if use_base64:
            stmt = """
            SELECT zoid, tid, encode(state, 'base64')
            FROM object_state
            WHERE zoid IN (%s)
            """ % oid_list
        else:
            stmt = """
            SELECT zoid, tid, state
            FROM object_state
            WHERE zoid IN (%s)
            """ % oid_list
        self.runner.run_script_stmt(cursor, stmt)

        add_objects = []
        add_refs = []
        for from_oid, tid, state in fetchmany(cursor):
            if hasattr(state, 'read'):
                # Oracle
                state = state.read()
            add_objects.append((from_oid, tid))
            if state:
                state = decode_bytes_param(state, use_base64)
                try:
                    to_oids = get_references(state)
                except:
                    log.error(
                        "pre_pack: can't unpickle "
                        "object %d in transaction %d; state length = %d" %
                        (from_oid, tid, len(state)))
                    raise
                for to_oid in to_oids:
                    add_refs.append((from_oid, tid, to_oid))

        if not add_objects:
            return 0

        stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list
        self.runner.run_script_stmt(cursor, stmt)
        stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list
        self.runner.run_script_stmt(cursor, stmt)

        stmt = """
        INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s)
        """
        self.runner.run_many(cursor, stmt, add_refs)

        stmt = """
        INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s)
        """
        self.runner.run_many(cursor, stmt, add_objects)

        return len(add_refs)
Ejemplo n.º 17
0
    def pack(self, pack_tid, sleep=None, packed_func=None):
        """Run garbage collection.

        Requires the information provided by pre_pack.
        """
        # Read committed mode is sufficient.
        conn, cursor = self.connmanager.open()
        try:
            try:
                stmt = """
                SELECT zoid, keep_tid
                FROM pack_object
                WHERE keep = %(FALSE)s
                """
                self.runner.run_script_stmt(cursor, stmt)
                to_remove = list(fetchmany(cursor))

                total = len(to_remove)
                log.info("pack: will remove %d object(s)", total)

                # Hold the commit lock while packing to prevent deadlocks.
                # Pack in small batches of transactions only after we are able
                # to obtain a commit lock in order to minimize the
                # interruption of concurrent write operations.
                start = time.time()
                packed_list = []
                # We'll report on progress in at most .1% step increments
                lastreport, reportstep = 0, max(total / 1000, 1)

                self._pause_pack_until_lock(cursor, sleep)
                while to_remove:
                    items = to_remove[:100]
                    del to_remove[:100]
                    stmt = """
                    DELETE FROM object_state
                    WHERE zoid = %s AND tid = %s
                    """
                    self.runner.run_many(cursor, stmt, items)
                    packed_list.extend(items)

                    if time.time() >= start + self.options.pack_batch_timeout:
                        conn.commit()
                        if packed_func is not None:
                            for oid, tid in packed_list:
                                packed_func(oid, tid)
                        del packed_list[:]
                        counter = total - len(to_remove)
                        if counter >= lastreport + reportstep:
                            log.info("pack: removed %d (%.1f%%) state(s)",
                                     counter, counter/float(total)*100)
                            lastreport = counter / reportstep * reportstep
                        self.locker.release_commit_lock(cursor)
                        self._pause_pack_until_lock(cursor, sleep)
                        start = time.time()

                if packed_func is not None:
                    for oid, tid in packed_list:
                        packed_func(oid, tid)
                packed_list = None

                self._pack_cleanup(conn, cursor)

            except:
                log.exception("pack: failed")
                conn.rollback()
                raise

            else:
                log.info("pack: finished successfully")
                conn.commit()

        finally:
            self.connmanager.close(conn, cursor)
Ejemplo n.º 18
0
    def generic_move_from_temp(self, cursor, tid, txn_has_blobs):
        """Moved the temporarily stored objects to permanent storage.

        Returns the list of oids stored.
        """
        if self.keep_history:
            if self.database_type == 'oracle':
                stmt = """
                INSERT INTO object_state
                    (zoid, tid, prev_tid, md5, state_size, state)
                SELECT zoid, :1, prev_tid, md5,
                    COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
            else:
                stmt = """
                INSERT INTO object_state
                    (zoid, tid, prev_tid, md5, state_size, state)
                SELECT zoid, %s, prev_tid, md5,
                    COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
            cursor.execute(stmt, (tid,))

        else:
            if self.database_type == 'mysql':
                stmt = """
                REPLACE INTO object_state (zoid, tid, state_size, state)
                SELECT zoid, %s, COALESCE(LENGTH(state), 0), state
                FROM temp_store
                """
                cursor.execute(stmt, (tid,))

            else:
                stmt = """
                DELETE FROM object_state
                WHERE zoid IN (SELECT zoid FROM temp_store)
                """
                cursor.execute(stmt)

                if self.database_type == 'oracle':
                    stmt = """
                    INSERT INTO object_state (zoid, tid, state_size, state)
                    SELECT zoid, :1, COALESCE(LENGTH(state), 0), state
                    FROM temp_store
                    """
                else:
                    stmt = """
                    INSERT INTO object_state (zoid, tid, state_size, state)
                    SELECT zoid, %s, COALESCE(LENGTH(state), 0), state
                    FROM temp_store
                    """
                cursor.execute(stmt, (tid,))

            if txn_has_blobs:
                stmt = """
                DELETE FROM blob_chunk
                WHERE zoid IN (SELECT zoid FROM temp_store)
                """
                cursor.execute(stmt)

        if txn_has_blobs:
            if self.database_type == 'oracle':
                stmt = """
                INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk)
                SELECT zoid, :1, chunk_num, chunk
                FROM temp_blob_chunk
                """
            else:
                stmt = """
                INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk)
                SELECT zoid, %s, chunk_num, chunk
                FROM temp_blob_chunk
                """
            cursor.execute(stmt, (tid,))

        stmt = """
        SELECT zoid FROM temp_store
        """
        cursor.execute(stmt)
        return [oid for (oid,) in fetchmany(cursor)]
Ejemplo n.º 19
0
 def _fetchmany(self, cursor):
     return fetchmany(cursor)
Ejemplo n.º 20
0
    def pack(self, pack_tid, sleep=None, packed_func=None):
        """Run garbage collection.

        Requires the information provided by pre_pack.
        """
        # Read committed mode is sufficient.
        conn, cursor = self.connmanager.open()
        try:
            try:
                stmt = """
                SELECT zoid, keep_tid
                FROM pack_object
                WHERE keep = %(FALSE)s
                """
                self.runner.run_script_stmt(cursor, stmt)
                to_remove = list(fetchmany(cursor))

                total = len(to_remove)
                log.info("pack: will remove %d object(s)", total)

                # Hold the commit lock while packing to prevent deadlocks.
                # Pack in small batches of transactions only after we are able
                # to obtain a commit lock in order to minimize the
                # interruption of concurrent write operations.
                start = time.time()
                packed_list = []
                # We'll report on progress in at most .1% step increments
                lastreport, reportstep = 0, max(total / 1000, 1)

                self._pause_pack_until_lock(cursor, sleep)
                while to_remove:
                    items = to_remove[:100]
                    del to_remove[:100]
                    stmt = """
                    DELETE FROM object_state
                    WHERE zoid = %s AND tid = %s
                    """
                    self.runner.run_many(cursor, stmt, items)
                    packed_list.extend(items)

                    if time.time() >= start + self.options.pack_batch_timeout:
                        conn.commit()
                        if packed_func is not None:
                            for oid, tid in packed_list:
                                packed_func(oid, tid)
                        del packed_list[:]
                        counter = total - len(to_remove)
                        if counter >= lastreport + reportstep:
                            log.info("pack: removed %d (%.1f%%) state(s)",
                                     counter, counter / float(total) * 100)
                            lastreport = counter / reportstep * reportstep
                        self.locker.release_commit_lock(cursor)
                        self._pause_pack_until_lock(cursor, sleep)
                        start = time.time()

                if packed_func is not None:
                    for oid, tid in packed_list:
                        packed_func(oid, tid)
                packed_list = None

                self._pack_cleanup(conn, cursor)

            except:
                log.exception("pack: failed")
                conn.rollback()
                raise

            else:
                log.info("pack: finished successfully")
                conn.commit()

        finally:
            self.connmanager.close(conn, cursor)
Ejemplo n.º 21
0
    def _traverse_graph(self, cursor):
        """Visit the entire object graph to find out what should be kept.

        Sets the pack_object.keep flags.
        """
        log.info("pre_pack: downloading pack_object and object_ref.")

        # Note: TreeSet can be updated at random much faster than Set,
        # but TreeSet consumes more memory. (Random TreeSet updates are
        # probably O(log n) while random Set updates are probably O(n).
        # OTOH, adding to Sets or TreeSets in order is an O(1) operation.)
        Set = BTrees.family64.II.Set
        TreeSet = BTrees.family64.II.TreeSet
        Bucket = BTrees.family64.IO.Bucket
        set_difference = BTrees.family64.II.difference

        # Download the list of root objects to keep from pack_object.
        keep_set = TreeSet()
        stmt = """
        SELECT zoid
        FROM pack_object
        WHERE keep = %(TRUE)s
        """
        self.runner.run_script_stmt(cursor, stmt)
        for from_oid, in fetchmany(cursor):
            keep_set.insert(from_oid)

        # Download the list of object references into all_refs.
        all_refs = Bucket()  # {from_oid: set([to_oid])}
        # Note the Oracle optimizer hints in the following statement; MySQL
        # and PostgreSQL ignore these. Oracle fails to notice that pack_object
        # is now filled and chooses the wrong execution plan, completely
        # killing this query on large RelStorage databases, unless these hints
        # are included.
        stmt = """
        SELECT
            /*+ FULL(object_ref) */
            /*+ FULL(pack_object) */
            object_ref.zoid, object_ref.to_zoid
        FROM object_ref
            JOIN pack_object ON (object_ref.zoid = pack_object.zoid)
        WHERE object_ref.tid >= pack_object.keep_tid
        ORDER BY object_ref.zoid, object_ref.to_zoid
        """
        # While downloading the OIDs, move them to Set and Bucket
        # objects. A Set takes a lot less RAM than Python integer sets.

        # Grouped by object_ref.zoid, store all object_ref.to_zoid in sets.
        self.runner.run_script_stmt(cursor, stmt)
        for from_oid, rows in groupby(fetchmany(cursor), itemgetter(0)):
            d = all_refs.get(from_oid)
            if d is None:
                all_refs[from_oid] = d = Set()
            d.update(row[1] for row in rows)

        # Traverse the object graph.  Add all of the reachable OIDs
        # to keep_set.
        log.info("pre_pack: traversing the object graph "
                 "to find reachable objects.")
        parents = Set(keep_set)
        pass_num = 0
        while parents:
            pass_num += 1
            children = TreeSet()
            for parent in parents:
                to_oids = all_refs.get(parent)
                if to_oids:
                    children.update(to_oids)
            parents = set_difference(children, keep_set)
            keep_set.update(parents)
            log.debug(
                "pre_pack: found %d more referenced object(s) in "
                "pass %d", len(parents), pass_num)

        # Set pack_object.keep for all OIDs in keep_set.
        del all_refs  # Free some RAM
        log.info("pre_pack: marking objects reachable: %d", len(keep_set))
        batch = []

        def upload_batch():
            oids_str = ','.join(str(oid) for oid in batch)
            del batch[:]
            stmt = """
            UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s
            WHERE zoid IN (%s)
            """ % oids_str
            self.runner.run_script_stmt(cursor, stmt)

        for oid in keep_set:
            batch.append(oid)
            if len(batch) >= 1000:
                upload_batch()
        if batch:
            upload_batch()
Ejemplo n.º 22
0
    def pack(self, pack_tid, sleep=None, packed_func=None):
        """Pack.  Requires the information provided by pre_pack."""

        # Read committed mode is sufficient.
        conn, cursor = self.connmanager.open()
        try:
            try:
                stmt = """
                SELECT transaction.tid,
                    CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END,
                    CASE WHEN pack_state_tid.tid IS NOT NULL THEN 1 ELSE 0 END
                FROM transaction
                    LEFT JOIN pack_state_tid ON (
                        transaction.tid = pack_state_tid.tid)
                WHERE transaction.tid > 0
                    AND transaction.tid <= %(pack_tid)s
                    AND (packed = %(FALSE)s OR pack_state_tid.tid IS NOT NULL)
                """
                self.runner.run_script_stmt(cursor, stmt,
                                            {'pack_tid': pack_tid})
                tid_rows = list(fetchmany(cursor))
                tid_rows.sort()  # oldest first

                total = len(tid_rows)
                log.info("pack: will pack %d transaction(s)", total)

                stmt = self._script_create_temp_pack_visit
                if stmt:
                    self.runner.run_script(cursor, stmt)

                # Hold the commit lock while packing to prevent deadlocks.
                # Pack in small batches of transactions only after we are able
                # to obtain a commit lock in order to minimize the
                # interruption of concurrent write operations.
                start = time.time()
                packed_list = []
                counter, lastreport, statecounter = 0, 0, 0
                # We'll report on progress in at most .1% step increments
                reportstep = max(total / 1000, 1)

                self._pause_pack_until_lock(cursor, sleep)
                for tid, packed, has_removable in tid_rows:
                    self._pack_transaction(cursor, pack_tid, tid, packed,
                                           has_removable, packed_list)
                    counter += 1
                    if time.time() >= start + self.options.pack_batch_timeout:
                        conn.commit()
                        if packed_func is not None:
                            for oid, tid in packed_list:
                                packed_func(oid, tid)
                        statecounter += len(packed_list)
                        if counter >= lastreport + reportstep:
                            log.info(
                                "pack: packed %d (%.1f%%) transaction(s), "
                                "affecting %d states", counter,
                                counter / float(total) * 100, statecounter)
                            lastreport = counter / reportstep * reportstep
                        del packed_list[:]
                        self.locker.release_commit_lock(cursor)
                        self._pause_pack_until_lock(cursor, sleep)
                        start = time.time()
                if packed_func is not None:
                    for oid, tid in packed_list:
                        packed_func(oid, tid)
                packed_list = None

                self._pack_cleanup(conn, cursor, sleep)

            except:
                log.exception("pack: failed")
                conn.rollback()
                raise

            else:
                log.info("pack: finished successfully")
                conn.commit()

        finally:
            self.connmanager.close(conn, cursor)