def test_image_upload_fail(self): """Test that image upload fails are handled properly.""" # Now swap out the upload fake so that the next uploads fail fake_client = fakeprovider.FakeUploadFailCloud(times_to_fail=1) def get_fake_client(*args, **kwargs): return fake_client self.useFixture(fixtures.MockPatchObject( Drivers.get('fake')['provider'], '_getClient', get_fake_client)) configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) # NOTE(pabelanger): Disable CleanupWorker thread for nodepool-builder # as we currently race it to validate our failed uploads. self.useBuilder(configfile, cleanup_interval=0) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) newest_builds = self.zk.getMostRecentBuilds(1, 'fake-image', state=zk.READY) self.assertEqual(1, len(newest_builds)) uploads = self.zk.getUploads('fake-image', newest_builds[0].id, 'fake-provider', states=[zk.FAILED]) self.assertEqual(1, len(uploads))
def get_provider_config(provider): provider.setdefault('driver', 'openstack') # Ensure legacy configuration still works when using fake cloud if provider.get('name', '').startswith('fake'): provider['driver'] = 'fake' driver = Drivers.get(provider['driver']) return driver['config'](provider)
def get_provider_config(provider): provider.setdefault('driver', 'openstack') # Ensure legacy configuration still works when using fake cloud if provider.get('name', '').startswith('fake'): provider['driver'] = 'fake' driver = Drivers.get(provider['driver']) return driver.getProviderConfig(provider)
def test_node_delete_failure(self): def fail_delete(self, name): raise RuntimeError('Fake Error') self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'], 'deleteServer', fail_delete)) configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self.useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.zk.lockNode(nodes[0], blocking=False) nodepool.launcher.NodeDeleter.delete( self.zk, pool.getProviderManager('fake-provider'), nodes[0]) # Make sure our old node is in delete state, even though delete failed deleted_node = self.zk.getNode(nodes[0].id) self.assertIsNotNone(deleted_node) self.assertEqual(deleted_node.state, zk.DELETING) # Make sure we have a new, READY node nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].provider, 'fake-provider')
def test_image_upload_fail(self): """Test that image upload fails are handled properly.""" # Now swap out the upload fake so that the next uploads fail fake_client = fakeprovider.FakeUploadFailCloud(times_to_fail=1) def get_fake_client(*args, **kwargs): return fake_client self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'], '_getClient', get_fake_client)) configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) # NOTE(pabelanger): Disable CleanupWorker thread for nodepool-builder # as we currently race it to validate our failed uploads. self.useBuilder(configfile, cleanup_interval=0) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) newest_builds = self.zk.getMostRecentBuilds(1, 'fake-image', state=zk.READY) self.assertEqual(1, len(newest_builds)) uploads = self.zk.getUploads('fake-image', newest_builds[0].id, 'fake-provider', states=[zk.FAILED]) self.assertEqual(1, len(uploads))
def _get_node_request_handler(self, provider, request): driver = Drivers.get(provider.driver.name) return driver['handler'](self, request)
def get_provider(provider): driver = Drivers.get(provider.driver.name) return driver.getProvider(provider)
def test_external_driver_config(self): configfile = self.setup_config('external_driver.yaml') nodepool_config.loadConfig(configfile) self.assertIn("config", Drivers.get("test"))
def get_provider(provider, use_taskmanager): driver = Drivers.get(provider.driver.name) return driver['provider'](provider, use_taskmanager)
def test_over_quota(self, config='node_quota_cloud.yaml'): ''' This tests what happens when a cloud unexpectedly returns an over-quota error. ''' # Start with an instance quota of 2 max_cores = math.inf max_instances = 2 max_ram = math.inf # patch the cloud with requested quota def fake_get_quota(): return (max_cores, max_instances, max_ram) self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'].fake_cloud, '_get_quota', fake_get_quota)) configfile = self.setup_config(config) self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') nodepool.launcher.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) client = pool.getProviderManager('fake-provider')._getClient() # Wait for a single node to be created req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') self.log.debug("Adding first request") self.zk.storeNodeRequest(req1) req1 = self.waitForNodeRequest(req1) self.assertEqual(req1.state, zk.FULFILLED) # Lock this node so it appears as used and not deleted req1_node = self.zk.getNode(req1.nodes[0]) self.zk.lockNode(req1_node, blocking=False) # Now, reduce the quota so the next node unexpectedly # (according to nodepool's quota estimate) fails. client.max_instances = 1 # Request a second node; this request should fail. req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') self.log.debug("Adding second request") self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2) self.assertEqual(req2.state, zk.FAILED) # After the second request failed, the internal quota estimate # should be reset, so the next request should pause to wait # for more quota to become available. req3 = zk.NodeRequest() req3.state = zk.REQUESTED req3.node_types.append('fake-label') self.log.debug("Adding third request") self.zk.storeNodeRequest(req3) req3 = self.waitForNodeRequest(req3, (zk.PENDING, )) self.assertEqual(req3.state, zk.PENDING) # Wait until there is a paused request handler and verify that # there is still only one server built (from the first # request). pool_worker = pool.getPoolWorkers('fake-provider') while not pool_worker[0].paused_handler: time.sleep(0.1) self.assertEqual(len(client._server_list), 1)
def _test_node_assignment_at_quota(self, config, max_cores=100, max_instances=20, max_ram=1000000): ''' Successful node launch should have unlocked nodes in READY state and assigned to the request. This should be run with a quota that fits for two nodes. ''' # patch the cloud with requested quota def fake_get_quota(): return (max_cores, max_instances, max_ram) self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'].fake_cloud, '_get_quota', fake_get_quota)) configfile = self.setup_config(config) self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') nodepool.launcher.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) client = pool.getProviderManager('fake-provider')._getClient() req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') req1.node_types.append('fake-label') self.zk.storeNodeRequest(req1) self.log.debug("Waiting for 1st request %s", req1.id) req1 = self.waitForNodeRequest(req1, (zk.FULFILLED, )) self.assertEqual(len(req1.nodes), 2) # Mark the first request's nodes as in use so they won't be deleted # when we pause. Locking them is enough. req1_node1 = self.zk.getNode(req1.nodes[0]) req1_node2 = self.zk.getNode(req1.nodes[1]) self.zk.lockNode(req1_node1, blocking=False) self.zk.lockNode(req1_node2, blocking=False) # One of the things we want to test is that if we spawn many # node launches at once, we do not deadlock while the request # handler pauses for quota. To ensure we test that case, # pause server creation until we have accepted all of the node # requests we submit. This will ensure that we hold locks on # all of the nodes before pausing so that we can validate they # are released. req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') req2.node_types.append('fake-label') self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2, (zk.PENDING, )) # At this point, we should have already created two servers for the # first request, and the request handler has accepted the second node # request but paused waiting for the server count to go below quota. # Wait until there is a paused request handler and check if there # are exactly two servers pool_worker = pool.getPoolWorkers('fake-provider') while not pool_worker[0].paused_handler: time.sleep(0.1) self.assertEqual(len(client._server_list), 2) # Mark the first request's nodes as USED, which will get them deleted # and allow the second to proceed. self.log.debug("Marking first node as used %s", req1.id) req1_node1.state = zk.USED self.zk.storeNode(req1_node1) self.zk.unlockNode(req1_node1) self.waitForNodeDeletion(req1_node1) # To force the sequential nature of what we're testing, wait for # the 2nd request to get a node allocated to it now that we've # freed up a node. self.log.debug("Waiting for node allocation for 2nd request") done = False while not done: for n in self.zk.nodeIterator(): if n.allocated_to == req2.id: done = True break self.log.debug("Marking second node as used %s", req1.id) req1_node2.state = zk.USED self.zk.storeNode(req1_node2) self.zk.unlockNode(req1_node2) self.waitForNodeDeletion(req1_node2) self.log.debug("Deleting 1st request %s", req1.id) self.zk.deleteNodeRequest(req1) self.waitForNodeRequestLockDeletion(req1.id) req2 = self.waitForNodeRequest(req2, (zk.FULFILLED, )) self.assertEqual(len(req2.nodes), 2)