def main(self): librariangc.log = self.logger if self.options.loglevel <= logging.DEBUG: librariangc.debug = True # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection # here is wrong. We should either create our own or use # Store.execute or cursor() and the transaction module. conn = IStore(LibraryFileAlias)._connection._raw_connection # Refuse to run if we have significant clock skew between the # librarian and the database. librariangc.confirm_no_clock_skew(conn) # Note that each of these next steps will issue commit commands # as appropriate to make this script transaction friendly if not self.options.skip_expiry: librariangc.expire_aliases(conn) if not self.options.skip_content: # First sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_blobs: librariangc.delete_expired_blobs(conn) if not self.options.skip_duplicates: librariangc.merge_duplicates(conn) if not self.options.skip_aliases: librariangc.delete_unreferenced_aliases(conn) if not self.options.skip_content: # Second sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_files: librariangc.delete_unwanted_files(conn)
def test_DeleteUnreferencedContent(self): # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) )
def test_DeleteExpiredBlobs(self): # Delete expired blobs from the TemporaryBlobStorage table librariangc.delete_expired_blobs(self.con) cur = self.con.cursor() # Our expired blob should be gone cur.execute(""" SELECT * FROM TemporaryBlobStorage WHERE id=%s """, (self.expired_blob_id,) ) self.failUnless(cur.fetchone() is None) # As should our expired blob linked elsewhere. cur.execute(""" SELECT * FROM TemporaryBlobStorage WHERE id=%s """, (self.expired2_blob_id,) ) self.failUnless(cur.fetchone() is None) # But our unexpired blob is still hanging around. cur.execute(""" SELECT * FROM TemporaryBlobStorage WHERE id=%s """, (self.unexpired_blob_id,) ) self.failUnless(cur.fetchone() is not None) # Now delete our unreferenced aliases and unreferenced content cur.execute( "SELECT id FROM LibraryFileAlias WHERE id IN (%s, %s, %s)", (self.expired_lfa_id, self.expired2_lfa_id, self.unexpired_lfa_id)) librariangc.delete_unreferenced_aliases(self.con) librariangc.delete_unreferenced_content(self.con) cur.execute( "SELECT id FROM LibraryFileAlias WHERE id IN (%s, %s, %s)", (self.expired_lfa_id, self.expired2_lfa_id, self.unexpired_lfa_id)) # The first expired blob should now be entirely gone cur.execute(""" SELECT * FROM LibraryFileAlias WHERE id=%s """, (self.expired_lfa_id,)) self.failUnless(cur.fetchone() is None) cur.execute(""" SELECT * FROM LibraryFileContent WHERE id=%s """, (self.expired_lfc_id,)) self.failUnless(cur.fetchone() is None) # The second expired blob will has lost its LibraryFileAlias, # but the content is still hanging around because something else # linked to it. cur.execute(""" SELECT * FROM LibraryFileAlias WHERE id=%s """, (self.expired2_lfa_id,)) self.failUnless(cur.fetchone() is None) cur.execute(""" SELECT * FROM LibraryFileContent WHERE id=%s """, (self.expired2_lfc_id,)) self.failUnless(cur.fetchone() is not None) # The unexpired blob should be unaffected cur.execute(""" SELECT * FROM LibraryFileAlias WHERE id=%s """, (self.unexpired_lfa_id,)) self.failUnless(cur.fetchone() is not None) cur.execute(""" SELECT * FROM LibraryFileContent WHERE id=%s """, (self.unexpired_lfc_id,)) self.failUnless(cur.fetchone() is not None)
def test_DeleteUnreferencedContent2(self): # Like testDeleteUnreferencedContent, except that the file is # removed from disk before attempting to remove the unreferenced # LibraryFileContent. # # Because the garbage collector will remove an unreferenced file from # disk before it commits the database changes, it is possible that the # db removal will fail (eg. an exception was raised on COMMIT) leaving # the rows untouched in the database but no file on disk. # This is fine, as the next gc run will attempt it again and # nothing can use unreferenced files anyway. This test ensures # that this all works. # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Remove the file from disk os.unlink(path) self.failIf(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) )