Esempio n. 1
0
def bigmark():
    """Follow ~20M references between 10M objects."""

    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(levelname)s [%(name)s] %(message)s')

    oid_count = 10 * 1000 * 1000
    k = 10000

    log.info("Generating random references.")

    marker = TreeMarker()
    marker.add_refs([(0, i * k) for i in range(20)])
    refcount = 20
    for i in xrange(1, oid_count):
        if random() < 0.2:
            refs = []
            for j in range(randint(0, 20)):
                refs.append((i * k, randint(0, oid_count) * k))
            marker.add_refs(refs)
            refcount += len(refs)

    log.info("Generated %d references.", refcount)

    log.info("Finding reachable objects.")

    pass_count = marker.mark([0])

    log.info("Found %d reachable objects in %d passes.",
             marker.reachable_count, pass_count)
Esempio n. 2
0
def bigmark():
    """Follow ~20M references between 10M objects."""

    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(levelname)s [%(name)s] %(message)s')

    oid_count = 10 * 1000 * 1000
    k = 10000

    log.info("Generating random references.")

    marker = TreeMarker()
    marker.add_refs([(0, i * k) for i in range(20)])
    refcount = 20
    for i in xrange(1, oid_count):
        if random() < 0.2:
            refs = []
            for j in range(randint(0, 20)):
                refs.append((i * k, randint(0, oid_count) * k))
            marker.add_refs(refs)
            refcount += len(refs)

    log.info("Generated %d references.", refcount)

    log.info("Finding reachable objects.")

    pass_count = marker.mark([0])

    log.info(
        "Found %d reachable objects in %d passes.",
        marker.reachable_count, pass_count)
Esempio n. 3
0
    def _traverse_graph(self, cursor):
        """Visit the entire object graph to find out what should be kept.

        Sets the pack_object.keep flags.
        """
        log.info("pre_pack: downloading pack_object and object_ref.")

        marker = TreeMarker()

        # Download the list of object references into the TreeMarker.

        # Note the Oracle optimizer hints in the following statement; MySQL
        # and PostgreSQL ignore these. Oracle fails to notice that pack_object
        # is now filled and chooses the wrong execution plan, completely
        # killing this query on large RelStorage databases, unless these hints
        # are included.
        stmt = """
        SELECT
            /*+ FULL(object_ref) */
            /*+ FULL(pack_object) */
            object_ref.zoid, object_ref.to_zoid
        FROM object_ref
            JOIN pack_object ON (object_ref.zoid = pack_object.zoid)
        WHERE object_ref.tid >= pack_object.keep_tid
        ORDER BY object_ref.zoid, object_ref.to_zoid
        """
        self.runner.run_script_stmt(cursor, stmt)
        while True:
            rows = cursor.fetchmany(10000)
            if not rows:
                break
            marker.add_refs(rows)

        # Use the TreeMarker to find all reachable objects.

        log.info("pre_pack: traversing the object graph "
                 "to find reachable objects.")
        stmt = """
        SELECT zoid
        FROM pack_object
        WHERE keep = %(TRUE)s
        """
        self.runner.run_script_stmt(cursor, stmt)
        while True:
            rows = cursor.fetchmany(10000)
            if not rows:
                break
            marker.mark(oid for (oid,) in rows)

        marker.free_refs()

        # Upload the TreeMarker results to the database.

        log.info(
            "pre_pack: marking objects reachable: %d",
            marker.reachable_count)

        batch = []

        def upload_batch():
            oids_str = ','.join(str(oid) for oid in batch)
            del batch[:]
            stmt = """
            UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s
            WHERE zoid IN (%s)
            """ % oids_str
            self.runner.run_script_stmt(cursor, stmt)

        batch_append = batch.append
        for oid in marker.reachable:
            batch_append(oid)
            if len(batch) >= 1000:
                upload_batch()
        if batch:
            upload_batch()
Esempio n. 4
0
    def _traverse_graph(self, cursor):
        """Visit the entire object graph to find out what should be kept.

        Sets the pack_object.keep flags.
        """
        log.info("pre_pack: downloading pack_object and object_ref.")

        marker = TreeMarker()

        # Download the list of object references into the TreeMarker.

        # Note the Oracle optimizer hints in the following statement; MySQL
        # and PostgreSQL ignore these. Oracle fails to notice that pack_object
        # is now filled and chooses the wrong execution plan, completely
        # killing this query on large RelStorage databases, unless these hints
        # are included.
        stmt = """
        SELECT
            /*+ FULL(object_ref) */
            /*+ FULL(pack_object) */
            object_ref.zoid, object_ref.to_zoid
        FROM object_ref
            JOIN pack_object ON (object_ref.zoid = pack_object.zoid)
        WHERE object_ref.tid >= pack_object.keep_tid
        ORDER BY object_ref.zoid, object_ref.to_zoid
        """
        self.runner.run_script_stmt(cursor, stmt)
        while True:
            rows = cursor.fetchmany(10000)
            if not rows:
                break
            marker.add_refs(rows)

        # Use the TreeMarker to find all reachable objects.

        log.info("pre_pack: traversing the object graph "
                 "to find reachable objects.")
        stmt = """
        SELECT zoid
        FROM pack_object
        WHERE keep = %(TRUE)s
        """
        self.runner.run_script_stmt(cursor, stmt)
        while True:
            rows = cursor.fetchmany(10000)
            if not rows:
                break
            marker.mark(oid for (oid, ) in rows)

        marker.free_refs()

        # Upload the TreeMarker results to the database.

        log.info("pre_pack: marking objects reachable: %d",
                 marker.reachable_count)

        batch = []

        def upload_batch():
            oids_str = ','.join(str(oid) for oid in batch)
            del batch[:]
            stmt = """
            UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s
            WHERE zoid IN (%s)
            """ % oids_str
            self.runner.run_script_stmt(cursor, stmt)

        batch_append = batch.append
        for oid in marker.reachable:
            batch_append(oid)
            if len(batch) >= 1000:
                upload_batch()
        if batch:
            upload_batch()