def main(self): librariangc.log = self.logger if self.options.loglevel <= logging.DEBUG: librariangc.debug = True # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection # here is wrong. We should either create our own or use # Store.execute or cursor() and the transaction module. conn = IStore(LibraryFileAlias)._connection._raw_connection # Refuse to run if we have significant clock skew between the # librarian and the database. librariangc.confirm_no_clock_skew(conn) # Note that each of these next steps will issue commit commands # as appropriate to make this script transaction friendly if not self.options.skip_expiry: librariangc.expire_aliases(conn) if not self.options.skip_content: # First sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_blobs: librariangc.delete_expired_blobs(conn) if not self.options.skip_duplicates: librariangc.merge_duplicates(conn) if not self.options.skip_aliases: librariangc.delete_unreferenced_aliases(conn) if not self.options.skip_content: # Second sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_files: librariangc.delete_unwanted_files(conn)
def test_delete_unwanted_files_follows_symlinks(self): # In production, our tree has symlinks in it now. We need to be able # to cope. # First, let's make sure we have some trash. switch_dbuser('testadmin') content = 'foo' self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain') # Roll back the database changes, leaving the file on disk. transaction.abort() switch_dbuser(config.librarian_gc.dbuser) # Now, we will move the directory containing the trash somewhere else # and make a symlink to it. original = os.path.join(config.librarian_server.root, '00', '00') newdir = tempfile.mkdtemp() alt = os.path.join(newdir, '00') shutil.move(original, alt) os.symlink(alt, original) # Now we will do our thing. This is the actual test. It used to # fail. librariangc.delete_unwanted_files(self.con) # Clean up. os.remove(original) shutil.move(alt, original) shutil.rmtree(newdir)
def main(self): librariangc.log = self.logger if self.options.loglevel <= logging.DEBUG: librariangc.debug = True # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection # here is wrong. We should either create our own or use # Store.execute or cursor() and the transaction module. conn = IStore(LibraryFileAlias)._connection._raw_connection # Refuse to run if we have significant clock skew between the # librarian and the database. librariangc.confirm_no_clock_skew(conn) # Note that each of these next steps will issue commit commands # as appropriate to make this script transaction friendly if not self.options.skip_expiry: librariangc.expire_aliases(conn) if not self.options.skip_content: # First sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_blobs: librariangc.delete_expired_blobs(conn) if not self.options.skip_duplicates: librariangc.merge_duplicates(conn) if not self.options.skip_aliases: librariangc.delete_unreferenced_aliases(conn) if not self.options.skip_content: # Second sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_files: librariangc.delete_unwanted_files(conn)
def test_deleteUnwantedFilesIgnoresNoise(self): # Directories with invalid names in the storage area are # ignored. They are reported as warnings though. # Not a hexidecimal number. noisedir1_path = os.path.join(config.librarian_server.root, 'zz') # Too long noisedir2_path = os.path.join(config.librarian_server.root, '111') # Long non-hexadecimal number noisedir3_path = os.path.join(config.librarian_server.root, '11.bak') try: os.mkdir(noisedir1_path) os.mkdir(noisedir2_path) os.mkdir(noisedir3_path) # Files in the noise directories. noisefile1_path = os.path.join(noisedir1_path, 'abc') noisefile2_path = os.path.join(noisedir2_path, 'def') noisefile3_path = os.path.join(noisedir2_path, 'ghi') open(noisefile1_path, 'w').write('hello') open(noisefile2_path, 'w').write('there') open(noisefile3_path, 'w').write('testsuite') # Pretend it is tomorrow to ensure the files don't count as # recently created, and run the delete_unwanted_files process. org_time = librariangc.time def tomorrow_time(): return org_time() + 24 * 60 * 60 + 1 try: librariangc.time = tomorrow_time librariangc.delete_unwanted_files(self.con) finally: librariangc.time = org_time # None of the rubbish we created has been touched. self.assert_(os.path.isdir(noisedir1_path)) self.assert_(os.path.isdir(noisedir2_path)) self.assert_(os.path.isdir(noisedir3_path)) self.assert_(os.path.exists(noisefile1_path)) self.assert_(os.path.exists(noisefile2_path)) self.assert_(os.path.exists(noisefile3_path)) finally: # We need to clean this up ourselves, as the standard librarian # cleanup only removes files it knows where valid to avoid # accidents. shutil.rmtree(noisedir1_path) shutil.rmtree(noisedir2_path) shutil.rmtree(noisedir3_path) # Can't check the ordering, so we'll just check that one of the # warnings are there. self.assertIn( "WARNING Ignoring invalid directory zz", librariangc.log.getLogBuffer())
def test_deleteUnwantedFiles(self): self.ztm.begin() cur = cursor() # We may find files in the LibraryFileContent repository # that do not have an corresponding LibraryFileContent row. # Find a content_id we can easily delete and do so. This row is # removed from the database, leaving an orphaned file on the # filesystem that should be removed. cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = content WHERE LibraryFileAlias.id IS NULL LIMIT 1 """) content_id = cur.fetchone()[0] cur.execute(""" DELETE FROM LibraryFileContent WHERE id=%s """, (content_id,)) self.ztm.commit() path = librariangc.get_file_path(content_id) self.failUnless(os.path.exists(path)) # Ensure delete_unreferenced_files does not remove the file, because # it will have just been created (has a recent date_created). There # is a window between file creation and the garbage collector # bothering to remove the file to avoid the race condition where the # garbage collector is run whilst a file is being uploaded. librariangc.delete_unwanted_files(self.con) self.failUnless(os.path.exists(path)) # To test removal does occur when we want it to, we need to trick # the garbage collector into thinking it is tomorrow. org_time = librariangc.time def tomorrow_time(): return org_time() + 24 * 60 * 60 + 1 try: librariangc.time = tomorrow_time librariangc.delete_unwanted_files(self.con) finally: librariangc.time = org_time self.failIf(os.path.exists(path)) # Make sure nothing else has been removed from disk self.ztm.begin() cur = cursor() cur.execute(""" SELECT id FROM LibraryFileContent """) for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) self.failUnless(os.path.exists(path))
def test_delete_unwanted_files_bug437084(self): # There was a bug where delete_unwanted_files() would die # if the last file found on disk was unwanted. switch_dbuser('testadmin') content = 'foo' self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain') # Roll back the database changes, leaving the file on disk. transaction.abort() switch_dbuser(config.librarian_gc.dbuser) # This should cope. librariangc.delete_unwanted_files(self.con)