def setUp(self): super(TestLibrarianDBOutage, self).setUp() self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture self.client = LibrarianClient() # Add a file to the Librarian so we can download it. self.url = self._makeLibraryFileUrl()
def test_uploadThenDownload(self): client = LibrarianClient() # Do this 10 times, to try to make sure we get all the threads in the # thread pool involved more than once, in case handling the second # request is an issue... for count in range(10): # Upload a file. This should work without any exceptions being # thrown. sampleData = 'x' + ('blah' * (count%5)) fileAlias = client.addFile('sample', len(sampleData), StringIO(sampleData), contentType='text/plain') # Make sure we can get its URL url = client.getURLForAlias(fileAlias) # However, we can't access it until we have committed, # because the server has no idea what mime-type to send it as # (NB. This could be worked around if necessary by having the # librarian allow access to files that don't exist in the DB # and spitting them out with an 'unknown' mime-type # -- StuartBishop) self.require404(url) self.commit() # Make sure we can download it using the API fileObj = client.getFileByAlias(fileAlias) self.assertEqual(sampleData, fileObj.read()) fileObj.close() # And make sure the URL works too fileObj = urlopen(url) self.assertEqual(sampleData, fileObj.read()) fileObj.close()
def test_checkGzipEncoding(self): # Files that end in ".txt.gz" are treated special and are returned # with an encoding of "gzip" or "x-gzip" to accomodate requirements of # displaying Ubuntu build logs in the browser. The mimetype should be # "text/plain" for these files. client = LibrarianClient() contents = u'Build log \N{SNOWMAN}...'.encode('UTF-8') build_log = BytesIO() with GzipFile(mode='wb', fileobj=build_log) as f: f.write(contents) build_log.seek(0) alias_id = client.addFile(name="build_log.txt.gz", size=len(build_log.getvalue()), file=build_log, contentType="text/plain") self.commit() url = client.getURLForAlias(alias_id) response = requests.get(url) response.raise_for_status() mimetype = response.headers['content-type'] encoding = response.headers['content-encoding'] self.assertTrue(mimetype == "text/plain; charset=utf-8", "Wrong mimetype. %s != 'text/plain'." % mimetype) self.assertTrue(encoding == "gzip", "Wrong encoding. %s != 'gzip'." % encoding) self.assertEqual(contents.decode('UTF-8'), response.text)
def setUp(self): super(TestFeedSwift, self).setUp() self.swift_fixture = self.useFixture(SwiftFixture()) self.useFixture(FeatureFixture({'librarian.swift.enabled': True})) transaction.commit() self.addCleanup(swift.connection_pool.clear) # Restart the Librarian so it picks up the OS_* environment # variables. LibrarianLayer.librarian_fixture.killTac() LibrarianLayer.librarian_fixture.setUp() # Add some files. These common sample files all have their # modification times set to the past so they will not be # considered potential in-progress uploads. the_past = time.time() - 25 * 60 * 60 self.librarian_client = LibrarianClient() self.contents = [str(i) * i for i in range(1, 5)] self.lfa_ids = [ self.add_file('file_{0}'.format(i), content, when=the_past) for i, content in enumerate(self.contents) ] self.lfas = [ IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) for lfa_id in self.lfa_ids ] self.lfcs = [lfa.content for lfa in self.lfas]
def test_missing_storage(self): # When a file exists in the DB but is missing from disk, a 404 # is just confusing. It's an internal error, so 500 instead. client = LibrarianClient() # Upload a file so we can retrieve it. sample_data = b'blah' file_alias_id = client.addFile('sample', len(sample_data), BytesIO(sample_data), contentType='text/plain') url = client.getURLForAlias(file_alias_id) # Change the date_created to a known value that doesn't match # the disk timestamp. The timestamp on disk cannot be trusted. file_alias = IMasterStore(LibraryFileAlias).get( LibraryFileAlias, file_alias_id) # Commit so the file is available from the Librarian. self.commit() # Fetch the file via HTTP. response = requests.get(url) response.raise_for_status() # Delete the on-disk file. storage = LibrarianStorage(config.librarian_server.root, None) os.remove(storage._fileLocation(file_alias.contentID)) # The URL now 500s, since the DB says it should exist. response = requests.get(url) self.assertEqual(500, response.status_code) self.assertIn('Server', response.headers) self.assertNotIn('Last-Modified', response.headers) self.assertNotIn('Cache-Control', response.headers)
def test_headers(self): client = LibrarianClient() # Upload a file so we can retrieve it. sample_data = 'blah' file_alias_id = client.addFile( 'sample', len(sample_data), StringIO(sample_data), contentType='text/plain') url = client.getURLForAlias(file_alias_id) # Change the date_created to a known value that doesn't match # the disk timestamp. The timestamp on disk cannot be trusted. file_alias = IMasterStore(LibraryFileAlias).get( LibraryFileAlias, file_alias_id) file_alias.date_created = datetime( 2001, 01, 30, 13, 45, 59, tzinfo=pytz.utc) # Commit so the file is available from the Librarian. self.commit() # Fetch the file via HTTP, recording the interesting headers result = urlopen(url) last_modified_header = result.info()['Last-Modified'] cache_control_header = result.info()['Cache-Control'] # URLs point to the same content for ever, so we have a hardcoded # 1 year max-age cache policy. self.failUnlessEqual(cache_control_header, 'max-age=31536000, public') # And we should have a correct Last-Modified header too. self.failUnlessEqual( last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
def testLibrarianWorking(self): # Check that the librian is actually working. This means at # a minimum the Librarian service is running and is connected # to the Launchpad database. want_librarian_working = ( self.want_librarian_running and self.want_launchpad_database and self.want_component_architecture ) client = LibrarianClient() data = 'Whatever' try: client.addFile( 'foo.txt', len(data), StringIO(data), 'text/plain' ) except UploadFailed: self.failIf( want_librarian_working, 'Librarian should be fully operational' ) # Since we use IMasterStore that doesn't throw either AttributeError # or ComponentLookupError. except TypeError: self.failIf( want_librarian_working, 'Librarian not operational as component architecture ' 'not loaded' ) else: self.failUnless( want_librarian_working, 'Librarian should not be operational' )
def test__getURLForDownload(self): # This protected method is used by getFileByAlias. It is supposed to # use the internal host and port rather than the external, proxied # host and port. This is to provide relief for our own issues with the # problems reported in bug 317482. # # (Set up:) client = LibrarianClient() alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') config.push( 'test config', textwrap.dedent('''\ [librarian] download_host: example.org download_port: 1234 ''')) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = 'http://example.org:1234/' download_url = client._getURLForDownload(alias_id) self.assertTrue( download_url.startswith(expected_host), 'expected %s to start with %s' % (download_url, expected_host)) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes( # Prevent a ProgrammingError LibrarianClient._getURLForDownload) self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop('test config')
def testUploadsSucceed(self): # This layer is able to be used on its own as it depends on # DatabaseLayer. # We can test this using remoteAddFile (it does not need the CA # loaded) client = LibrarianClient() data = 'This is a test' client.remoteAddFile( 'foo.txt', len(data), StringIO(data), 'text/plain')
def testUploadsSucceed(self): # This layer is able to be used on its own as it depends on # DatabaseLayer. # We can test this using remoteAddFile (it does not need the CA # loaded) client = LibrarianClient() data = 'This is a test' client.remoteAddFile('foo.txt', len(data), StringIO(data), 'text/plain')
def test_duplicateuploads(self): client = LibrarianClient() filename = 'sample.txt' id1 = client.addFile(filename, 6, StringIO('sample'), 'text/plain') id2 = client.addFile(filename, 6, StringIO('sample'), 'text/plain') self.failIfEqual(id1, id2, 'Got allocated the same id!') self.commit() self.failUnlessEqual(client.getFileByAlias(id1).read(), 'sample') self.failUnlessEqual(client.getFileByAlias(id2).read(), 'sample')
def test_addFile_uses_master(self): # addFile is a write operation, so it should always use the # master store, even if the slave is the default. Close the # slave store and try to add a file, verifying that the master # is used. client = LibrarianClient() ISlaveStore(LibraryFileAlias).close() with SlaveDatabasePolicy(): alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain") transaction.commit() f = client.getFileByAlias(alias_id) self.assertEqual(f.read(), "sample")
def test_duplicateuploads(self): client = LibrarianClient() filename = 'sample.txt' id1 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') id2 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.assertNotEqual(id1, id2, 'Got allocated the same id!') self.commit() self.assertEqual(client.getFileByAlias(id1).read(), b'sample') self.assertEqual(client.getFileByAlias(id2).read(), b'sample')
def test_addFile_uses_master(self): # addFile is a write operation, so it should always use the # master store, even if the slave is the default. Close the # slave store and try to add a file, verifying that the master # is used. client = LibrarianClient() ISlaveStore(LibraryFileAlias).close() with SlaveDatabasePolicy(): alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') transaction.commit() f = client.getFileByAlias(alias_id) self.assertEqual(f.read(), 'sample')
def test_getURLForAliasObject(self): # getURLForAliasObject returns the same URL as getURLForAlias. client = LibrarianClient() content = "Test content" alias_id = client.addFile( 'test.txt', len(content), StringIO(content), contentType='text/plain') self.commit() alias = getUtility(ILibraryFileAliasSet)[alias_id] self.assertEqual( client.getURLForAlias(alias_id), client.getURLForAliasObject(alias))
def test_clientWrongDatabase(self): # If the client is using the wrong database, the server should refuse # the upload, causing LibrarianClient to raise UploadFailed. client = LibrarianClient() # Force the client to mis-report its database client._getDatabaseName = lambda cur: "wrong_database" try: client.addFile("sample.txt", 6, StringIO("sample"), "text/plain") except UploadFailed as e: msg = e.args[0] self.failUnless(msg.startswith("Server said: 400 Wrong database"), "Unexpected UploadFailed error: " + msg) else: self.fail("UploadFailed not raised")
def test_getURLForAliasObject(self): # getURLForAliasObject returns the same URL as getURLForAlias. client = LibrarianClient() content = b"Test content" alias_id = client.addFile('test.txt', len(content), BytesIO(content), contentType='text/plain') self.commit() alias = getUtility(ILibraryFileAliasSet)[alias_id] self.assertEqual(client.getURLForAlias(alias_id), client.getURLForAliasObject(alias))
def test_addFile_hashes(self): # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the # LibraryFileContent record. data = "i am some data" md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() client = LibrarianClient() lfa = LibraryFileAlias.get(client.addFile("file", len(data), StringIO(data), "text/plain")) self.assertEqual(md5, lfa.content.md5) self.assertEqual(sha1, lfa.content.sha1) self.assertEqual(sha256, lfa.content.sha256)
def test_clientWrongDatabase(self): # If the client is using the wrong database, the server should refuse # the upload, causing LibrarianClient to raise UploadFailed. client = LibrarianClient() # Force the client to mis-report its database client._getDatabaseName = lambda cur: 'wrong_database' try: client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') except UploadFailed as e: msg = e.args[0] self.assertTrue(msg.startswith('Server said: 400 Wrong database'), 'Unexpected UploadFailed error: ' + msg) else: self.fail("UploadFailed not raised")
def test_addFile_hashes(self): # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the # LibraryFileContent record. data = 'i am some data' md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() client = LibrarianClient() lfa = LibraryFileAlias.get( client.addFile('file', len(data), StringIO(data), 'text/plain')) self.assertEqual(md5, lfa.content.md5) self.assertEqual(sha1, lfa.content.sha1) self.assertEqual(sha256, lfa.content.sha256)
def __init__(self, log, config, diskpool, archive, allowed_suites=None, library=None): """Initialize a publisher. Publishers need the pool root dir and a DiskPool object. Optionally we can pass a list of tuples, (distroseries.name, pocket), which will restrict the publisher actions, only suites listed in allowed_suites will be modified. """ self.log = log self._config = config self.distro = archive.distribution self.archive = archive self.allowed_suites = allowed_suites self._diskpool = diskpool if library is None: self._library = LibrarianClient() else: self._library = library # Track which distroseries pockets have been dirtied by a # change, and therefore need domination/apt-ftparchive work. # This is a set of tuples in the form (distroseries.name, pocket) self.dirty_pockets = set() # Track which pockets need release files. This will contain more # than dirty_pockets in the case of a careful index run. # This is a set of tuples in the form (distroseries.name, pocket) self.release_files_needed = set()
def test_restricted_subdomain_must_match_file_alias(self): # IFF there is a .restricted. in the host, then the library file alias # in the subdomain must match that in the path. client = LibrarianClient() fileAlias = client.addFile('sample', 12, BytesIO(b'a' * 12), contentType='text/plain') fileAlias2 = client.addFile('sample', 12, BytesIO(b'b' * 12), contentType='text/plain') self.commit() url = client.getURLForAlias(fileAlias) download_host = urlparse(config.librarian.download_url)[1] if ':' in download_host: download_host = download_host[:download_host.find(':')] template_host = 'i%%d.restricted.%s' % download_host path = get_libraryfilealias_download_path(fileAlias, 'sample') # The basic URL must work. response = requests.get(url) response.raise_for_status() # Use the network level protocol because DNS resolution won't work # here (no wildcard support) connection = httplib.HTTPConnection(config.librarian.download_host, config.librarian.download_port) # A valid subdomain based URL must work. good_host = template_host % fileAlias connection.request("GET", path, headers={'Host': good_host}) response = connection.getresponse() response.read() self.assertEqual(200, response.status, response) # A subdomain based URL trying to put fileAlias into the restricted # domain of fileAlias2 must not work. hostile_host = template_host % fileAlias2 connection.request("GET", path, headers={'Host': hostile_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status) # A subdomain which matches the LFA but is nested under one that # doesn't is also treated as hostile. nested_host = 'i%d.restricted.i%d.restricted.%s' % ( fileAlias, fileAlias2, download_host) connection.request("GET", path, headers={'Host': nested_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status)
def test_librarian_is_reset(self): # Add a file. We use remoteAddFile because it does not need the CA # loaded to work. client = LibrarianClient() LibrarianTestCase.url = client.remoteAddFile( self.sample_data, len(self.sample_data), StringIO(self.sample_data), 'text/plain') self.failUnlessEqual( urlopen(LibrarianTestCase.url).read(), self.sample_data) # Perform the librarian specific between-test code: LibrarianLayer.testTearDown() LibrarianLayer.testSetUp() # Which should have nuked the old file. # XXX: StuartBishop 2006-06-30 Bug=51370: # We should get a DownloadFailed exception here. data = urlopen(LibrarianTestCase.url).read() self.failIfEqual(data, self.sample_data)
def test_404(self): client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) self.assertEqual(urlopen(url).read(), 'sample') # Change the aliasid and assert we get a 404 self.failUnless(str(aid) in url) bad_id_url = uri_path_replace(url, str(aid), str(aid+1)) self.require404(bad_id_url) # Change the filename and assert we get a 404 self.failUnless(filename in url) bad_name_url = uri_path_replace(url, filename, 'different.txt') self.require404(bad_name_url)
def setUp(self): super(TestLibrarianGarbageCollection, self).setUp() self.client = LibrarianClient() self.patch(librariangc, 'log', BufferLogger()) # A value we use in a number of tests. This represents the # stay of execution hard coded into the garbage collector. # We don't destroy any data unless it has been waiting to be # destroyed for longer than this period. We pick a value # that is close enough to the stay of execution so that # forgetting timezone information will break things, but # far enough so that how long it takes the test to run # is not an issue. 'stay_of_excution - 1 hour' fits these # criteria. self.recent_past = utc_now() - timedelta(days=6, hours=23) # A time beyond the stay of execution. self.ancient_past = utc_now() - timedelta(days=30) self.f1_id, self.f2_id = self._makeDupes() switch_dbuser(config.librarian_gc.dbuser) self.ztm = self.layer.txn # Make sure the files exist. We do this in setup, because we # need to use the get_file_path method later in the setup and we # want to be sure it is working correctly. path = librariangc.get_file_path(self.f1_id) self.failUnless(os.path.exists(path), "Librarian uploads failed") # Make sure that every file the database knows about exists on disk. # We manually remove them for tests that need to cope with missing # library items. self.ztm.begin() cur = cursor() cur.execute("SELECT id FROM LibraryFileContent") for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) if not os.path.exists(path): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) open(path, 'w').write('whatever') self.ztm.abort() self.con = connect( user=config.librarian_gc.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
def test_oldurl(self): # 'old' urls are in the form of http://server:port/cid/aid/fname # which we want to continue supporting. The content id is simply # ignored client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) self.assertEqual(urlopen(url).read(), 'sample') old_url = uri_path_replace(url, str(aid), '42/%d' % aid) self.assertEqual(urlopen(old_url).read(), 'sample') # If the content and alias IDs are not integers, a 404 is raised old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid) self.require404(old_url) old_url = uri_path_replace(url, str(aid), '%d/foo' % aid) self.require404(old_url)
def testHideLibrarian(self): # First perform a successful upload: client = LibrarianClient() data = 'foo' client.remoteAddFile( 'foo', len(data), StringIO(data), 'text/plain') # The database was committed to, but not by this process, so we need # to ensure that it is fully torn down and recreated. DatabaseLayer.force_dirty_database() # Hide the librarian, and show that the upload fails: LibrarianLayer.hide() self.assertRaises(UploadFailed, client.remoteAddFile, 'foo', len(data), StringIO(data), 'text/plain') # Reveal the librarian again, allowing uploads: LibrarianLayer.reveal() client.remoteAddFile( 'foo', len(data), StringIO(data), 'text/plain')
def test_librarian_is_reset(self): # Add a file. We use remoteAddFile because it does not need the CA # loaded to work. client = LibrarianClient() LibrarianTestCase.url = client.remoteAddFile( self.sample_data, len(self.sample_data), StringIO(self.sample_data), 'text/plain' ) self.failUnlessEqual( urlopen(LibrarianTestCase.url).read(), self.sample_data ) # Perform the librarian specific between-test code: LibrarianLayer.testTearDown() LibrarianLayer.testSetUp() # Which should have nuked the old file. # XXX: StuartBishop 2006-06-30 Bug=51370: # We should get a DownloadFailed exception here. data = urlopen(LibrarianTestCase.url).read() self.failIfEqual(data, self.sample_data)
def test_restricted_subdomain_must_match_file_alias(self): # IFF there is a .restricted. in the host, then the library file alias # in the subdomain must match that in the path. client = LibrarianClient() fileAlias = client.addFile('sample', 12, StringIO('a'*12), contentType='text/plain') fileAlias2 = client.addFile('sample', 12, StringIO('b'*12), contentType='text/plain') self.commit() url = client.getURLForAlias(fileAlias) download_host = urlparse(config.librarian.download_url)[1] if ':' in download_host: download_host = download_host[:download_host.find(':')] template_host = 'i%%d.restricted.%s' % download_host path = get_libraryfilealias_download_path(fileAlias, 'sample') # The basic URL must work. urlopen(url) # Use the network level protocol because DNS resolution won't work # here (no wildcard support) connection = httplib.HTTPConnection( config.librarian.download_host, config.librarian.download_port) # A valid subdomain based URL must work. good_host = template_host % fileAlias connection.request("GET", path, headers={'Host': good_host}) response = connection.getresponse() response.read() self.assertEqual(200, response.status, response) # A subdomain based URL trying to put fileAlias into the restricted # domain of fileAlias2 must not work. hostile_host = template_host % fileAlias2 connection.request("GET", path, headers={'Host': hostile_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status) # A subdomain which matches the LFA but is nested under one that # doesn't is also treated as hostile. nested_host = 'i%d.restricted.i%d.restricted.%s' % ( fileAlias, fileAlias2, download_host) connection.request("GET", path, headers={'Host': nested_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status)
def test_404(self): client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') # Change the aliasid and assert we get a 404 self.assertIn(str(aid), url) bad_id_url = uri_path_replace(url, str(aid), str(aid + 1)) self.require404(bad_id_url) # Change the filename and assert we get a 404 self.assertIn(filename, url) bad_name_url = uri_path_replace(url, filename, 'different.txt') self.require404(bad_name_url)
def test_checkNoEncoding(self): # Other files should have no encoding. client = LibrarianClient() contents = 'Build log...' build_log = StringIO(contents) alias_id = client.addFile(name="build_log.tgz", size=len(contents), file=build_log, contentType="application/x-tar") self.commit() url = client.getURLForAlias(alias_id) fileObj = urlopen(url) mimetype = fileObj.headers['content-type'] self.assertRaises(KeyError, fileObj.headers.__getitem__, 'content-encoding') self.failUnless( mimetype == "application/x-tar", "Wrong mimetype. %s != 'application/x-tar'." % mimetype)
def test_checkNoEncoding(self): # Other files should have no encoding. client = LibrarianClient() contents = b'Build log...' build_log = BytesIO(contents) alias_id = client.addFile(name="build_log.tgz", size=len(contents), file=build_log, contentType="application/x-tar") self.commit() url = client.getURLForAlias(alias_id) response = requests.get(url) response.raise_for_status() mimetype = response.headers['content-type'] self.assertNotIn('content-encoding', response.headers) self.assertTrue( mimetype == "application/x-tar", "Wrong mimetype. %s != 'application/x-tar'." % mimetype)
def get_restricted_file_and_public_url(self): # Use a regular LibrarianClient to ensure we speak to the # nonrestricted port on the librarian which is where secured # restricted files are served from. client = LibrarianClient() fileAlias = client.addFile( 'sample', 12, StringIO('a'*12), contentType='text/plain') # Note: We're deliberately using the wrong url here: we should be # passing secure=True to getURLForAlias, but to use the returned URL # we would need a wildcard DNS facility patched into urlopen; instead # we use the *deliberate* choice of having the path of secure and # insecure urls be the same, so that we can test it: the server code # doesn't need to know about the fancy wildcard domains. url = client.getURLForAlias(fileAlias) # Now that we have a url which talks to the public librarian, make the # file restricted. IMasterStore(LibraryFileAlias).find(LibraryFileAlias, LibraryFileAlias.id==fileAlias).set( LibraryFileAlias.restricted==True) self.commit() return fileAlias, url
def test_headers(self): client = LibrarianClient() # Upload a file so we can retrieve it. sample_data = b'blah' file_alias_id = client.addFile('sample', len(sample_data), BytesIO(sample_data), contentType='text/plain') url = client.getURLForAlias(file_alias_id) # Change the date_created to a known value that doesn't match # the disk timestamp. The timestamp on disk cannot be trusted. file_alias = IMasterStore(LibraryFileAlias).get( LibraryFileAlias, file_alias_id) file_alias.date_created = datetime(2001, 1, 30, 13, 45, 59, tzinfo=pytz.utc) # Commit so the file is available from the Librarian. self.commit() # Fetch the file via HTTP, recording the interesting headers response = requests.get(url) response.raise_for_status() last_modified_header = response.headers['Last-Modified'] cache_control_header = response.headers['Cache-Control'] # URLs point to the same content for ever, so we have a hardcoded # 1 year max-age cache policy. self.assertEqual(cache_control_header, 'max-age=31536000, public') # And we should have a correct Last-Modified header too. self.assertEqual(last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
def test_checkGzipEncoding(self): # Files that end in ".txt.gz" are treated special and are returned # with an encoding of "gzip" or "x-gzip" to accomodate requirements of # displaying Ubuntu build logs in the browser. The mimetype should be # "text/plain" for these files. client = LibrarianClient() contents = 'Build log...' build_log = StringIO(contents) alias_id = client.addFile(name="build_log.txt.gz", size=len(contents), file=build_log, contentType="text/plain") self.commit() url = client.getURLForAlias(alias_id) fileObj = urlopen(url) mimetype = fileObj.headers['content-type'] encoding = fileObj.headers['content-encoding'] self.failUnless(mimetype == "text/plain", "Wrong mimetype. %s != 'text/plain'." % mimetype) self.failUnless(encoding == "gzip", "Wrong encoding. %s != 'gzip'." % encoding)
def test_oldurl(self): # 'old' urls are in the form of http://server:port/cid/aid/fname # which we want to continue supporting. The content id is simply # ignored client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') old_url = uri_path_replace(url, str(aid), '42/%d' % aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') # If the content and alias IDs are not integers, a 404 is raised old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid) self.require404(old_url) old_url = uri_path_replace(url, str(aid), '%d/foo' % aid) self.require404(old_url)
def get_restricted_file_and_public_url(self, filename='sample'): # Use a regular LibrarianClient to ensure we speak to the # nonrestricted port on the librarian which is where secured # restricted files are served from. client = LibrarianClient() fileAlias = client.addFile(filename, 12, BytesIO(b'a' * 12), contentType='text/plain') # Note: We're deliberately using the wrong url here: we should be # passing secure=True to getURLForAlias, but to use the returned URL # we would need a wildcard DNS facility patched into requests; instead # we use the *deliberate* choice of having the path of secure and # insecure urls be the same, so that we can test it: the server code # doesn't need to know about the fancy wildcard domains. url = client.getURLForAlias(fileAlias) # Now that we have a url which talks to the public librarian, make the # file restricted. IMasterStore(LibraryFileAlias).find( LibraryFileAlias, LibraryFileAlias.id == fileAlias).set(restricted=True) self.commit() return fileAlias, url
def test_uploadThenDownload(self): client = LibrarianClient() # Do this 10 times, to try to make sure we get all the threads in the # thread pool involved more than once, in case handling the second # request is an issue... for count in range(10): # Upload a file. This should work without any exceptions being # thrown. sampleData = b'x' + (b'blah' * (count % 5)) fileAlias = client.addFile('sample', len(sampleData), BytesIO(sampleData), contentType='text/plain') # Make sure we can get its URL url = client.getURLForAlias(fileAlias) # However, we can't access it until we have committed, # because the server has no idea what mime-type to send it as # (NB. This could be worked around if necessary by having the # librarian allow access to files that don't exist in the DB # and spitting them out with an 'unknown' mime-type # -- StuartBishop) self.require404(url) self.commit() # Make sure we can download it using the API fileObj = client.getFileByAlias(fileAlias) self.assertEqual(sampleData, fileObj.read()) fileObj.close() # And make sure the URL works too response = requests.get(url) response.raise_for_status() self.assertEqual(sampleData, response.content)
def test__getURLForDownload(self): # This protected method is used by getFileByAlias. It is supposed to # use the internal host and port rather than the external, proxied # host and port. This is to provide relief for our own issues with the # problems reported in bug 317482. # # (Set up:) client = LibrarianClient() alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain") config.push( "test config", textwrap.dedent( """\ [librarian] download_host: example.org download_port: 1234 """ ), ) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = "http://example.org:1234/" download_url = client._getURLForDownload(alias_id) self.failUnless( download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host) ) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes(LibrarianClient._getURLForDownload) # Prevent a ProgrammingError self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop("test config")
def testLibrarianWorking(self): # Check that the librian is actually working. This means at # a minimum the Librarian service is running and is connected # to the Launchpad database. want_librarian_working = (self.want_librarian_running and self.want_launchpad_database and self.want_component_architecture) client = LibrarianClient() data = 'Whatever' try: client.addFile('foo.txt', len(data), StringIO(data), 'text/plain') except UploadFailed: self.failIf(want_librarian_working, 'Librarian should be fully operational') # Since we use IMasterStore that doesn't throw either AttributeError # or ComponentLookupError. except TypeError: self.failIf( want_librarian_working, 'Librarian not operational as component architecture ' 'not loaded') else: self.failUnless(want_librarian_working, 'Librarian should not be operational')
class TestLibrarianDBOutage(TestCase): layer = PGBouncerLibrarianLayer def setUp(self): super(TestLibrarianDBOutage, self).setUp() self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture self.client = LibrarianClient() # Add a file to the Librarian so we can download it. self.url = self._makeLibraryFileUrl() def _makeLibraryFileUrl(self): data = 'whatever' return self.client.remoteAddFile('foo.txt', len(data), StringIO(data), 'text/plain') def getErrorCode(self): # We need to talk to every Librarian thread to ensure all the # Librarian database connections are in a known state. # XXX StuartBishop 2011-09-01 bug=840046: 20 might be overkill # for the test run, but we have no real way of knowing how many # connections are in use. num_librarian_threads = 20 codes = set() for count in range(num_librarian_threads): try: urllib2.urlopen(self.url).read() codes.add(200) except urllib2.HTTPError as error: codes.add(error.code) self.assertTrue(len(codes) == 1, 'Mixed responses: %s' % str(codes)) return codes.pop() def test_outage(self): # Everything should be working fine to start with. self.assertEqual(self.getErrorCode(), 200) # When the outage kicks in, we start getting 503 responses # instead of 200 and 404s. self.pgbouncer.stop() self.assertEqual(self.getErrorCode(), 503) # When the outage is over, things are back to normal. self.pgbouncer.start() self.assertEqual(self.getErrorCode(), 200)
class TestLibrarianDBOutage(TestCase): layer = PGBouncerLibrarianLayer def setUp(self): super(TestLibrarianDBOutage, self).setUp() self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture self.client = LibrarianClient() # Add a file to the Librarian so we can download it. self.url = self._makeLibraryFileUrl() def _makeLibraryFileUrl(self): data = "whatever" return self.client.remoteAddFile("foo.txt", len(data), StringIO(data), "text/plain") def getErrorCode(self): # We need to talk to every Librarian thread to ensure all the # Librarian database connections are in a known state. # XXX StuartBishop 2011-09-01 bug=840046: 20 might be overkill # for the test run, but we have no real way of knowing how many # connections are in use. num_librarian_threads = 20 codes = set() for count in range(num_librarian_threads): try: urllib2.urlopen(self.url).read() codes.add(200) except urllib2.HTTPError as error: codes.add(error.code) self.assertTrue(len(codes) == 1, "Mixed responses: %s" % str(codes)) return codes.pop() def test_outage(self): # Everything should be working fine to start with. self.assertEqual(self.getErrorCode(), 200) # When the outage kicks in, we start getting 503 responses # instead of 200 and 404s. self.pgbouncer.stop() self.assertEqual(self.getErrorCode(), 503) # When the outage is over, things are back to normal. self.pgbouncer.start() self.assertEqual(self.getErrorCode(), 200)
def testHideLibrarian(self): # First perform a successful upload: client = LibrarianClient() data = 'foo' client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain') # The database was committed to, but not by this process, so we need # to ensure that it is fully torn down and recreated. DatabaseLayer.force_dirty_database() # Hide the librarian, and show that the upload fails: LibrarianLayer.hide() self.assertRaises(UploadFailed, client.remoteAddFile, 'foo', len(data), StringIO(data), 'text/plain') # Reveal the librarian again, allowing uploads: LibrarianLayer.reveal() client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain')
class TestLibrarianGarbageCollection(TestCase): layer = LaunchpadZopelessLayer def setUp(self): super(TestLibrarianGarbageCollection, self).setUp() self.client = LibrarianClient() self.patch(librariangc, 'log', BufferLogger()) # A value we use in a number of tests. This represents the # stay of execution hard coded into the garbage collector. # We don't destroy any data unless it has been waiting to be # destroyed for longer than this period. We pick a value # that is close enough to the stay of execution so that # forgetting timezone information will break things, but # far enough so that how long it takes the test to run # is not an issue. 'stay_of_excution - 1 hour' fits these # criteria. self.recent_past = utc_now() - timedelta(days=6, hours=23) # A time beyond the stay of execution. self.ancient_past = utc_now() - timedelta(days=30) self.f1_id, self.f2_id = self._makeDupes() switch_dbuser(config.librarian_gc.dbuser) self.ztm = self.layer.txn # Make sure the files exist. We do this in setup, because we # need to use the get_file_path method later in the setup and we # want to be sure it is working correctly. path = librariangc.get_file_path(self.f1_id) self.failUnless(os.path.exists(path), "Librarian uploads failed") # Make sure that every file the database knows about exists on disk. # We manually remove them for tests that need to cope with missing # library items. self.ztm.begin() cur = cursor() cur.execute("SELECT id FROM LibraryFileContent") for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) if not os.path.exists(path): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) open(path, 'w').write('whatever') self.ztm.abort() self.con = connect( user=config.librarian_gc.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT) def tearDown(self): self.con.rollback() self.con.close() del self.con super(TestLibrarianGarbageCollection, self).tearDown() def _makeDupes(self): """Create two duplicate LibraryFileContent entries with one LibraryFileAlias each. Return the two LibraryFileAlias ids as a tuple. """ # Connect to the database as a user with file upload privileges, # in this case the PostgreSQL default user who happens to be an # administrator on launchpad development boxes. switch_dbuser('testadmin') ztm = self.layer.txn ztm.begin() # Add some duplicate files content = 'This is some content' f1_id = self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain', ) f1 = LibraryFileAlias.get(f1_id) f2_id = self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain', ) f2 = LibraryFileAlias.get(f2_id) # Make sure the duplicates really are distinct self.failIfEqual(f1_id, f2_id) self.failIfEqual(f1.contentID, f2.contentID) f1.date_created = self.ancient_past f2.date_created = self.ancient_past f1.content.datecreated = self.ancient_past f2.content.datecreated = self.ancient_past del f1, f2 ztm.commit() return f1_id, f2_id def test_MergeDuplicates(self): # Merge the duplicates librariangc.merge_duplicates(self.con) # merge_duplicates should have committed self.ztm.begin() self.ztm.abort() # Confirm that the duplicates have been merged self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.failUnlessEqual(f1.contentID, f2.contentID) def test_DeleteUnreferencedAliases(self): self.ztm.begin() # Confirm that our sample files are there. f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) # Grab the content IDs related to these # unreferenced LibraryFileAliases c1_id = f1.contentID c2_id = f2.contentID del f1, f2 self.ztm.abort() # Delete unreferenced aliases librariangc.delete_unreferenced_aliases(self.con) # This should have committed self.ztm.begin() # Confirm that the LibaryFileContents are still there. LibraryFileContent.get(c1_id) LibraryFileContent.get(c2_id) # But the LibraryFileAliases should be gone self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id) def test_DeleteUnreferencedAliases2(self): # Don't delete LibraryFileAliases accessed recently # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # We now have two aliases sharing the same content. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.assertEqual(f1.content, f2.content) # Flag one of our LibraryFileAliases as being recently created f1.date_created = self.recent_past del f1 del f2 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove # the alias with the ID self.f2_id, but the other should stay, # as it was accessed recently. librariangc.delete_unreferenced_aliases(self.con) self.ztm.begin() LibraryFileAlias.get(self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id) def test_DeleteUnreferencedAndWellExpiredAliases(self): # LibraryFileAliases can be removed after they have expired # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the past self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.ancient_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are gone self.ztm.begin() self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id) def test_DoneDeleteUnreferencedButNotExpiredAliases(self): # LibraryFileAliases can be removed only after they have expired. # If an explicit expiry is set and in recent past (currently up to # one week ago), the files hang around. # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the # recent past. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.recent_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should not remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the recent past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are still there self.ztm.begin() # Our recently expired LibraryFileAlias is still available. LibraryFileAlias.get(self.f1_id) def test_deleteWellExpiredAliases(self): # LibraryFileAlias records that are expired are unlinked from their # content. # Flag one of our LibraryFileAliases with an expiry date in the past self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.ancient_past del f1 self.ztm.commit() # Unlink expired LibraryFileAliases. librariangc.expire_aliases(self.con) self.ztm.begin() # Make sure the well expired f1 is still there, but has no content. f1 = LibraryFileAlias.get(self.f1_id) self.assert_(f1.content is None) # f2 should still have content, as it isn't flagged for expiry. f2 = LibraryFileAlias.get(self.f2_id) self.assert_(f2.content is not None) def test_ignoreRecentlyExpiredAliases(self): # LibraryFileAlias records that have expired recently are not # garbage collected. # Flag one of our LibraryFileAliases with an expiry date in the # recent past. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.recent_past # Within stay of execution. del f1 self.ztm.commit() # Unlink expired LibraryFileAliases. librariangc.expire_aliases(self.con) self.ztm.begin() # Make sure f1 is still there and has content. This ensures that # our stay of execution is still working. f1 = LibraryFileAlias.get(self.f1_id) self.assert_(f1.content is not None) # f2 should still have content, as it isn't flagged for expiry. f2 = LibraryFileAlias.get(self.f2_id) self.assert_(f2.content is not None) def test_DeleteUnreferencedContent(self): # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) ) def test_DeleteUnreferencedContent2(self): # Like testDeleteUnreferencedContent, except that the file is # removed from disk before attempting to remove the unreferenced # LibraryFileContent. # # Because the garbage collector will remove an unreferenced file from # disk before it commits the database changes, it is possible that the # db removal will fail (eg. an exception was raised on COMMIT) leaving # the rows untouched in the database but no file on disk. # This is fine, as the next gc run will attempt it again and # nothing can use unreferenced files anyway. This test ensures # that this all works. # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Remove the file from disk os.unlink(path) self.failIf(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) ) def test_deleteUnwantedFiles(self): self.ztm.begin() cur = cursor() # We may find files in the LibraryFileContent repository # that do not have an corresponding LibraryFileContent row. # Find a content_id we can easily delete and do so. This row is # removed from the database, leaving an orphaned file on the # filesystem that should be removed. cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = content WHERE LibraryFileAlias.id IS NULL LIMIT 1 """) content_id = cur.fetchone()[0] cur.execute(""" DELETE FROM LibraryFileContent WHERE id=%s """, (content_id,)) self.ztm.commit() path = librariangc.get_file_path(content_id) self.failUnless(os.path.exists(path)) # Ensure delete_unreferenced_files does not remove the file, because # it will have just been created (has a recent date_created). There # is a window between file creation and the garbage collector # bothering to remove the file to avoid the race condition where the # garbage collector is run whilst a file is being uploaded. librariangc.delete_unwanted_files(self.con) self.failUnless(os.path.exists(path)) # To test removal does occur when we want it to, we need to trick # the garbage collector into thinking it is tomorrow. org_time = librariangc.time def tomorrow_time(): return org_time() + 24 * 60 * 60 + 1 try: librariangc.time = tomorrow_time librariangc.delete_unwanted_files(self.con) finally: librariangc.time = org_time self.failIf(os.path.exists(path)) # Make sure nothing else has been removed from disk self.ztm.begin() cur = cursor() cur.execute(""" SELECT id FROM LibraryFileContent """) for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) self.failUnless(os.path.exists(path)) def test_deleteUnwantedFilesIgnoresNoise(self): # Directories with invalid names in the storage area are # ignored. They are reported as warnings though. # Not a hexidecimal number. noisedir1_path = os.path.join(config.librarian_server.root, 'zz') # Too long noisedir2_path = os.path.join(config.librarian_server.root, '111') # Long non-hexadecimal number noisedir3_path = os.path.join(config.librarian_server.root, '11.bak') try: os.mkdir(noisedir1_path) os.mkdir(noisedir2_path) os.mkdir(noisedir3_path) # Files in the noise directories. noisefile1_path = os.path.join(noisedir1_path, 'abc') noisefile2_path = os.path.join(noisedir2_path, 'def') noisefile3_path = os.path.join(noisedir2_path, 'ghi') open(noisefile1_path, 'w').write('hello') open(noisefile2_path, 'w').write('there') open(noisefile3_path, 'w').write('testsuite') # Pretend it is tomorrow to ensure the files don't count as # recently created, and run the delete_unwanted_files process. org_time = librariangc.time def tomorrow_time(): return org_time() + 24 * 60 * 60 + 1 try: librariangc.time = tomorrow_time librariangc.delete_unwanted_files(self.con) finally: librariangc.time = org_time # None of the rubbish we created has been touched. self.assert_(os.path.isdir(noisedir1_path)) self.assert_(os.path.isdir(noisedir2_path)) self.assert_(os.path.isdir(noisedir3_path)) self.assert_(os.path.exists(noisefile1_path)) self.assert_(os.path.exists(noisefile2_path)) self.assert_(os.path.exists(noisefile3_path)) finally: # We need to clean this up ourselves, as the standard librarian # cleanup only removes files it knows where valid to avoid # accidents. shutil.rmtree(noisedir1_path) shutil.rmtree(noisedir2_path) shutil.rmtree(noisedir3_path) # Can't check the ordering, so we'll just check that one of the # warnings are there. self.assertIn( "WARNING Ignoring invalid directory zz", librariangc.log.getLogBuffer()) def test_delete_unwanted_files_bug437084(self): # There was a bug where delete_unwanted_files() would die # if the last file found on disk was unwanted. switch_dbuser('testadmin') content = 'foo' self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain') # Roll back the database changes, leaving the file on disk. transaction.abort() switch_dbuser(config.librarian_gc.dbuser) # This should cope. librariangc.delete_unwanted_files(self.con) def test_delete_unwanted_files_follows_symlinks(self): # In production, our tree has symlinks in it now. We need to be able # to cope. # First, let's make sure we have some trash. switch_dbuser('testadmin') content = 'foo' self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain') # Roll back the database changes, leaving the file on disk. transaction.abort() switch_dbuser(config.librarian_gc.dbuser) # Now, we will move the directory containing the trash somewhere else # and make a symlink to it. original = os.path.join(config.librarian_server.root, '00', '00') newdir = tempfile.mkdtemp() alt = os.path.join(newdir, '00') shutil.move(original, alt) os.symlink(alt, original) # Now we will do our thing. This is the actual test. It used to # fail. librariangc.delete_unwanted_files(self.con) # Clean up. os.remove(original) shutil.move(alt, original) shutil.rmtree(newdir) def test_cronscript(self): script_path = os.path.join( config.root, 'cronscripts', 'librarian-gc.py' ) cmd = [sys.executable, script_path, '-q'] process = Popen(cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE) (script_output, _empty) = process.communicate() self.failUnlessEqual( process.returncode, 0, 'Error: %s' % script_output) self.failUnlessEqual(script_output, '') # Make sure that our example files have been garbage collected self.ztm.begin() self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id) # And make sure stuff that *is* referenced remains LibraryFileAlias.get(2) cur = cursor() cur.execute("SELECT count(*) FROM LibraryFileAlias") count = cur.fetchone()[0] self.failIfEqual(count, 0) cur.execute("SELECT count(*) FROM LibraryFileContent") count = cur.fetchone()[0] self.failIfEqual(count, 0) def test_confirm_no_clock_skew(self): # There should not be any clock skew when running the test suite. librariangc.confirm_no_clock_skew(self.con) # To test this function raises an excption when it should, # the garbage collector into thinking it is tomorrow. org_time = librariangc.time def tomorrow_time(): return org_time() + 24 * 60 * 60 + 1 try: librariangc.time = tomorrow_time self.assertRaises( Exception, librariangc.confirm_no_clock_skew, (self.con,) ) finally: librariangc.time = org_time
def _sendHeader(self, name, value): if name == "Database-Name": self.sentDatabaseName = True return LibrarianClient._sendHeader(self, name, value)
def test_aliasNotFound(self): client = LibrarianClient() self.assertRaises(DownloadFailed, client.getURLForAlias, 99)
def _getURLForDownload(self, aliasID): self.called_getURLForDownload = True return LibrarianClient._getURLForDownload(self, aliasID)
def _sendHeader(self, name, value): if name == 'Database-Name': self.sentDatabaseName = True return LibrarianClient._sendHeader(self, name, value)
class TestFeedSwift(TestCase): layer = LaunchpadZopelessLayer def setUp(self): super(TestFeedSwift, self).setUp() self.swift_fixture = self.useFixture(SwiftFixture()) self.useFixture(FeatureFixture({'librarian.swift.enabled': True})) transaction.commit() self.addCleanup(swift.connection_pool.clear) # Restart the Librarian so it picks up the OS_* environment # variables. LibrarianLayer.librarian_fixture.killTac() LibrarianLayer.librarian_fixture.setUp() # Add some files. These common sample files all have their # modification times set to the past so they will not be # considered potential in-progress uploads. the_past = time.time() - 25 * 60 * 60 self.librarian_client = LibrarianClient() self.contents = [str(i) * i for i in range(1, 5)] self.lfa_ids = [ self.add_file('file_{0}'.format(i), content, when=the_past) for i, content in enumerate(self.contents) ] self.lfas = [ IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) for lfa_id in self.lfa_ids ] self.lfcs = [lfa.content for lfa in self.lfas] def tearDown(self): super(TestFeedSwift, self).tearDown() # Restart the Librarian so it picks up the feature flag change. self.attachLibrarianLog(LibrarianLayer.librarian_fixture) LibrarianLayer.librarian_fixture.killTac() LibrarianLayer.librarian_fixture.setUp() @write_transaction def add_file(self, name, content, when=None, content_type='text/plain'): lfa_id = self.librarian_client.addFile(name=name, size=len(content), file=StringIO(content), contentType=content_type) if when is None: when = 0 # Very very old lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) path = swift.filesystem_path(lfa.content.id) os.utime(path, (when, when)) return lfa_id def test_copy_to_swift(self): log = BufferLogger() # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Copy all the files into Swift. swift.to_swift(log, remove_func=None) # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Confirm all the files are also in Swift. swift_client = self.swift_fixture.connect() for lfc, contents in zip(self.lfcs, self.contents): container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(contents, obj, 'Did not round trip') # Running again does nothing, in particular does not reupload # the files to Swift. con_patch = patch.object(swift.swiftclient.Connection, 'put_object', side_effect=AssertionError('do not call')) with con_patch: swift.to_swift(log) # remove_func == None def test_copy_to_swift_and_rename(self): log = BufferLogger() # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Copy all the files into Swift. swift.to_swift(log, remove_func=swift.rename) # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) + '.migrated' self.assertTrue(os.path.exists(path)) # Confirm all the files are also in Swift. swift_client = self.swift_fixture.connect() for lfc, contents in zip(self.lfcs, self.contents): container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(contents, obj, 'Did not round trip') # Running again does nothing, in particular does not reupload # the files to Swift. con_patch = patch.object(swift.swiftclient.Connection, 'put_object', side_effect=AssertionError('do not call')) with con_patch: swift.to_swift(log, remove_func=swift.rename) # remove == False def test_move_to_swift(self): log = BufferLogger() # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Migrate all the files into Swift. swift.to_swift(log, remove_func=os.unlink) # Confirm that all the files have gone from disk. for lfc in self.lfcs: self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id))) # Confirm all the files are in Swift. swift_client = self.swift_fixture.connect() for lfc, contents in zip(self.lfcs, self.contents): container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(contents, obj, 'Did not round trip') def test_librarian_serves_from_swift(self): log = BufferLogger() # Move all the files into Swift and off the file system. swift.to_swift(log, remove_func=os.unlink) # Confirm we can still access the files from the Librarian. for lfa_id, content in zip(self.lfa_ids, self.contents): data = self.librarian_client.getFileByAlias(lfa_id).read() self.assertEqual(content, data) def test_librarian_serves_from_disk(self): # Ensure the Librarian falls back to serving files from disk # when they cannot be found in the Swift server. Note that other # Librarian tests do not have Swift active, so this test is not # redundant. for lfa_id, content in zip(self.lfa_ids, self.contents): data = self.librarian_client.getFileByAlias(lfa_id).read() self.assertEqual(content, data) def test_largish_binary_files_from_disk(self): # Generate a largish blob, including null bytes for kicks. # A largish file is large enough that the HTTP upload needs # to be done in multiple chunks, but small enough that it is # stored in Swift as a single object. size = 512 * 1024 # 512KB expected_content = ''.join(chr(i % 256) for i in range(0, size)) lfa_id = self.add_file('hello_bigboy.xls', expected_content) # Data round trips when served from disk. lfa = self.librarian_client.getFileByAlias(lfa_id) self.assertEqual(expected_content, lfa.read()) def test_largish_binary_files_from_swift(self): # Generate large blob, multiple of the chunk size. # Including null bytes for kicks. # A largish file is large enough that the HTTP upload needs # to be done in multiple chunks, but small enough that it is # stored in Swift as a single object. size = LibrarianStorage.CHUNK_SIZE * 50 self.assertTrue(size > 1024 * 1024) expected_content = ''.join(chr(i % 256) for i in range(0, size)) lfa_id = self.add_file('hello_bigboy.xls', expected_content) lfc = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id).content # This data size is a multiple of our chunk size. self.assertEqual(0, len(expected_content) % LibrarianStorage.CHUNK_SIZE) # Data round trips when served from Swift. swift.to_swift(BufferLogger(), remove_func=os.unlink) self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id))) lfa = self.librarian_client.getFileByAlias(lfa_id) self.assertEqual(expected_content, lfa.read()) def test_largish_binary_files_from_swift_offset(self): # Generate large blob, but NOT a multiple of the chunk size. # Including null bytes for kicks. # A largish file is large enough that the HTTP upload needs # to be done in multiple chunks, but small enough that it is # stored in Swift as a single object. size = LibrarianStorage.CHUNK_SIZE * 50 + 1 self.assertTrue(size > 1024 * 1024) expected_content = ''.join(chr(i % 256) for i in range(0, size)) lfa_id = self.add_file('hello_bigboy.xls', expected_content) lfc = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id).content # This data size is NOT a multiple of our chunk size. self.assertNotEqual( 0, len(expected_content) % LibrarianStorage.CHUNK_SIZE) # Data round trips when served from Swift. swift.to_swift(BufferLogger(), remove_func=os.unlink) lfa = self.librarian_client.getFileByAlias(lfa_id) self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id))) self.assertEqual(expected_content, lfa.read()) def test_large_file_to_swift(self): # Generate a blob large enough that Swift requires us to store # it as multiple objects plus a manifest. size = LibrarianStorage.CHUNK_SIZE * 50 self.assertTrue(size > 1024 * 1024) expected_content = ''.join(chr(i % 256) for i in range(0, size)) lfa_id = self.add_file('hello_bigboy.xls', expected_content) lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) lfc = lfa.content # We don't really want to upload a file >5GB to our mock Swift, # so change the constant instead. Set it so we need 3 segments. def _reset_max(val): swift.MAX_SWIFT_OBJECT_SIZE = val self.addCleanup(_reset_max, swift.MAX_SWIFT_OBJECT_SIZE) swift.MAX_SWIFT_OBJECT_SIZE = int(size / 2) - 1 # Shove the file requiring multiple segments into Swift. swift.to_swift(BufferLogger(), remove_func=None) # As our mock Swift does not support multi-segment files, # instead we examine it directly in Swift as best we can. swift_client = self.swift_fixture.connect() # The manifest exists. Unfortunately, we can't test that the # magic manifest header is set correctly. container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(obj, '') # The segments we expect are all in their expected locations. _, obj1 = swift_client.get_object(container, '{0}/0000'.format(name)) _, obj2 = swift_client.get_object(container, '{0}/0001'.format(name)) _, obj3 = swift_client.get_object(container, '{0}/0002'.format(name)) self.assertRaises(swiftclient.ClientException, swift.quiet_swiftclient, swift_client.get_object, container, '{0}/0003'.format(name)) # Our object round tripped self.assertEqual(obj1 + obj2 + obj3, expected_content)