def main(self):
        librariangc.log = self.logger

        if self.options.loglevel <= logging.DEBUG:
            librariangc.debug = True

        # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection
        # here is wrong. We should either create our own or use
        # Store.execute or cursor() and the transaction module.
        conn = IStore(LibraryFileAlias)._connection._raw_connection

        # Refuse to run if we have significant clock skew between the
        # librarian and the database.
        librariangc.confirm_no_clock_skew(conn)

        # Note that each of these next steps will issue commit commands
        # as appropriate to make this script transaction friendly
        if not self.options.skip_expiry:
            librariangc.expire_aliases(conn)
        if not self.options.skip_content:
            # First sweep.
            librariangc.delete_unreferenced_content(conn)
        if not self.options.skip_blobs:
            librariangc.delete_expired_blobs(conn)
        if not self.options.skip_duplicates:
            librariangc.merge_duplicates(conn)
        if not self.options.skip_aliases:
            librariangc.delete_unreferenced_aliases(conn)
        if not self.options.skip_content:
            # Second sweep.
            librariangc.delete_unreferenced_content(conn)
        if not self.options.skip_files:
            librariangc.delete_unwanted_files(conn)
Example #2
0
    def test_delete_unwanted_files_follows_symlinks(self):
        # In production, our tree has symlinks in it now.  We need to be able
        # to cope.
        # First, let's make sure we have some trash.
        switch_dbuser('testadmin')
        content = 'foo'
        self.client.addFile(
            'foo.txt', len(content), StringIO(content), 'text/plain')
        # Roll back the database changes, leaving the file on disk.
        transaction.abort()

        switch_dbuser(config.librarian_gc.dbuser)

        # Now, we will move the directory containing the trash somewhere else
        # and make a symlink to it.
        original = os.path.join(config.librarian_server.root, '00', '00')
        newdir = tempfile.mkdtemp()
        alt = os.path.join(newdir, '00')
        shutil.move(original, alt)
        os.symlink(alt, original)

        # Now we will do our thing.  This is the actual test.  It used to
        # fail.
        librariangc.delete_unwanted_files(self.con)

        # Clean up.
        os.remove(original)
        shutil.move(alt, original)
        shutil.rmtree(newdir)
    def main(self):
        librariangc.log = self.logger

        if self.options.loglevel <= logging.DEBUG:
            librariangc.debug = True

        # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection
        # here is wrong. We should either create our own or use
        # Store.execute or cursor() and the transaction module.
        conn = IStore(LibraryFileAlias)._connection._raw_connection

        # Refuse to run if we have significant clock skew between the
        # librarian and the database.
        librariangc.confirm_no_clock_skew(conn)

        # Note that each of these next steps will issue commit commands
        # as appropriate to make this script transaction friendly
        if not self.options.skip_expiry:
            librariangc.expire_aliases(conn)
        if not self.options.skip_content:
            # First sweep.
            librariangc.delete_unreferenced_content(conn)
        if not self.options.skip_blobs:
            librariangc.delete_expired_blobs(conn)
        if not self.options.skip_duplicates:
            librariangc.merge_duplicates(conn)
        if not self.options.skip_aliases:
            librariangc.delete_unreferenced_aliases(conn)
        if not self.options.skip_content:
            # Second sweep.
            librariangc.delete_unreferenced_content(conn)
        if not self.options.skip_files:
            librariangc.delete_unwanted_files(conn)
Example #4
0
    def test_deleteUnwantedFilesIgnoresNoise(self):
        # Directories with invalid names in the storage area are
        # ignored. They are reported as warnings though.

        # Not a hexidecimal number.
        noisedir1_path = os.path.join(config.librarian_server.root, 'zz')

        # Too long
        noisedir2_path = os.path.join(config.librarian_server.root, '111')

        # Long non-hexadecimal number
        noisedir3_path = os.path.join(config.librarian_server.root, '11.bak')

        try:
            os.mkdir(noisedir1_path)
            os.mkdir(noisedir2_path)
            os.mkdir(noisedir3_path)

            # Files in the noise directories.
            noisefile1_path = os.path.join(noisedir1_path, 'abc')
            noisefile2_path = os.path.join(noisedir2_path, 'def')
            noisefile3_path = os.path.join(noisedir2_path, 'ghi')
            open(noisefile1_path, 'w').write('hello')
            open(noisefile2_path, 'w').write('there')
            open(noisefile3_path, 'w').write('testsuite')

            # Pretend it is tomorrow to ensure the files don't count as
            # recently created, and run the delete_unwanted_files process.
            org_time = librariangc.time

            def tomorrow_time():
                return org_time() + 24 * 60 * 60 + 1

            try:
                librariangc.time = tomorrow_time
                librariangc.delete_unwanted_files(self.con)
            finally:
                librariangc.time = org_time

            # None of the rubbish we created has been touched.
            self.assert_(os.path.isdir(noisedir1_path))
            self.assert_(os.path.isdir(noisedir2_path))
            self.assert_(os.path.isdir(noisedir3_path))
            self.assert_(os.path.exists(noisefile1_path))
            self.assert_(os.path.exists(noisefile2_path))
            self.assert_(os.path.exists(noisefile3_path))
        finally:
            # We need to clean this up ourselves, as the standard librarian
            # cleanup only removes files it knows where valid to avoid
            # accidents.
            shutil.rmtree(noisedir1_path)
            shutil.rmtree(noisedir2_path)
            shutil.rmtree(noisedir3_path)

        # Can't check the ordering, so we'll just check that one of the
        # warnings are there.
        self.assertIn(
            "WARNING Ignoring invalid directory zz",
            librariangc.log.getLogBuffer())
Example #5
0
    def test_deleteUnwantedFiles(self):
        self.ztm.begin()
        cur = cursor()

        # We may find files in the LibraryFileContent repository
        # that do not have an corresponding LibraryFileContent row.

        # Find a content_id we can easily delete and do so. This row is
        # removed from the database, leaving an orphaned file on the
        # filesystem that should be removed.
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = content
            WHERE LibraryFileAlias.id IS NULL
            LIMIT 1
            """)
        content_id = cur.fetchone()[0]
        cur.execute("""
                DELETE FROM LibraryFileContent WHERE id=%s
                """, (content_id,))
        self.ztm.commit()

        path = librariangc.get_file_path(content_id)
        self.failUnless(os.path.exists(path))

        # Ensure delete_unreferenced_files does not remove the file, because
        # it will have just been created (has a recent date_created). There
        # is a window between file creation and the garbage collector
        # bothering to remove the file to avoid the race condition where the
        # garbage collector is run whilst a file is being uploaded.
        librariangc.delete_unwanted_files(self.con)
        self.failUnless(os.path.exists(path))

        # To test removal does occur when we want it to, we need to trick
        # the garbage collector into thinking it is tomorrow.
        org_time = librariangc.time

        def tomorrow_time():
            return org_time() + 24 * 60 * 60 + 1

        try:
            librariangc.time = tomorrow_time
            librariangc.delete_unwanted_files(self.con)
        finally:
            librariangc.time = org_time

        self.failIf(os.path.exists(path))

        # Make sure nothing else has been removed from disk
        self.ztm.begin()
        cur = cursor()
        cur.execute("""
                SELECT id FROM LibraryFileContent
                """)
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            self.failUnless(os.path.exists(path))
Example #6
0
    def test_delete_unwanted_files_bug437084(self):
        # There was a bug where delete_unwanted_files() would die
        # if the last file found on disk was unwanted.
        switch_dbuser('testadmin')
        content = 'foo'
        self.client.addFile(
            'foo.txt', len(content), StringIO(content), 'text/plain')
        # Roll back the database changes, leaving the file on disk.
        transaction.abort()

        switch_dbuser(config.librarian_gc.dbuser)

        # This should cope.
        librariangc.delete_unwanted_files(self.con)