def bigmark(): """Follow ~20M references between 10M objects.""" logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(levelname)s [%(name)s] %(message)s') oid_count = 10 * 1000 * 1000 k = 10000 log.info("Generating random references.") marker = TreeMarker() marker.add_refs([(0, i * k) for i in range(20)]) refcount = 20 for i in xrange(1, oid_count): if random() < 0.2: refs = [] for j in range(randint(0, 20)): refs.append((i * k, randint(0, oid_count) * k)) marker.add_refs(refs) refcount += len(refs) log.info("Generated %d references.", refcount) log.info("Finding reachable objects.") pass_count = marker.mark([0]) log.info( "Found %d reachable objects in %d passes.", marker.reachable_count, pass_count)
def bigmark(): """Follow ~20M references between 10M objects.""" logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(levelname)s [%(name)s] %(message)s') oid_count = 10 * 1000 * 1000 k = 10000 log.info("Generating random references.") marker = TreeMarker() marker.add_refs([(0, i * k) for i in range(20)]) refcount = 20 for i in xrange(1, oid_count): if random() < 0.2: refs = [] for j in range(randint(0, 20)): refs.append((i * k, randint(0, oid_count) * k)) marker.add_refs(refs) refcount += len(refs) log.info("Generated %d references.", refcount) log.info("Finding reachable objects.") pass_count = marker.mark([0]) log.info("Found %d reachable objects in %d passes.", marker.reachable_count, pass_count)
def _traverse_graph(self, cursor): """Visit the entire object graph to find out what should be kept. Sets the pack_object.keep flags. """ log.info("pre_pack: downloading pack_object and object_ref.") marker = TreeMarker() # Download the list of object references into the TreeMarker. # Note the Oracle optimizer hints in the following statement; MySQL # and PostgreSQL ignore these. Oracle fails to notice that pack_object # is now filled and chooses the wrong execution plan, completely # killing this query on large RelStorage databases, unless these hints # are included. stmt = """ SELECT /*+ FULL(object_ref) */ /*+ FULL(pack_object) */ object_ref.zoid, object_ref.to_zoid FROM object_ref JOIN pack_object ON (object_ref.zoid = pack_object.zoid) WHERE object_ref.tid >= pack_object.keep_tid ORDER BY object_ref.zoid, object_ref.to_zoid """ self.runner.run_script_stmt(cursor, stmt) while True: rows = cursor.fetchmany(10000) if not rows: break marker.add_refs(rows) # Use the TreeMarker to find all reachable objects. log.info("pre_pack: traversing the object graph " "to find reachable objects.") stmt = """ SELECT zoid FROM pack_object WHERE keep = %(TRUE)s """ self.runner.run_script_stmt(cursor, stmt) while True: rows = cursor.fetchmany(10000) if not rows: break marker.mark(oid for (oid,) in rows) marker.free_refs() # Upload the TreeMarker results to the database. log.info( "pre_pack: marking objects reachable: %d", marker.reachable_count) batch = [] def upload_batch(): oids_str = ','.join(str(oid) for oid in batch) del batch[:] stmt = """ UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s WHERE zoid IN (%s) """ % oids_str self.runner.run_script_stmt(cursor, stmt) batch_append = batch.append for oid in marker.reachable: batch_append(oid) if len(batch) >= 1000: upload_batch() if batch: upload_batch()
def _traverse_graph(self, cursor): """Visit the entire object graph to find out what should be kept. Sets the pack_object.keep flags. """ log.info("pre_pack: downloading pack_object and object_ref.") marker = TreeMarker() # Download the list of object references into the TreeMarker. # Note the Oracle optimizer hints in the following statement; MySQL # and PostgreSQL ignore these. Oracle fails to notice that pack_object # is now filled and chooses the wrong execution plan, completely # killing this query on large RelStorage databases, unless these hints # are included. stmt = """ SELECT /*+ FULL(object_ref) */ /*+ FULL(pack_object) */ object_ref.zoid, object_ref.to_zoid FROM object_ref JOIN pack_object ON (object_ref.zoid = pack_object.zoid) WHERE object_ref.tid >= pack_object.keep_tid ORDER BY object_ref.zoid, object_ref.to_zoid """ self.runner.run_script_stmt(cursor, stmt) while True: rows = cursor.fetchmany(10000) if not rows: break marker.add_refs(rows) # Use the TreeMarker to find all reachable objects. log.info("pre_pack: traversing the object graph " "to find reachable objects.") stmt = """ SELECT zoid FROM pack_object WHERE keep = %(TRUE)s """ self.runner.run_script_stmt(cursor, stmt) while True: rows = cursor.fetchmany(10000) if not rows: break marker.mark(oid for (oid, ) in rows) marker.free_refs() # Upload the TreeMarker results to the database. log.info("pre_pack: marking objects reachable: %d", marker.reachable_count) batch = [] def upload_batch(): oids_str = ','.join(str(oid) for oid in batch) del batch[:] stmt = """ UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s WHERE zoid IN (%s) """ % oids_str self.runner.run_script_stmt(cursor, stmt) batch_append = batch.append for oid in marker.reachable: batch_append(oid) if len(batch) >= 1000: upload_batch() if batch: upload_batch()