Ejemplo n.º 1
0
class ProvisionerServiceNoContextualizationTest(BaseProvisionerServiceTests):

    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = None

        self.store = self.setup_store()
        self.driver = FakeNodeDriver()
        self.driver.initialize()

        self.spawn_procs()
        self.load_dtrs()

    def test_launch_no_context(self):

        all_node_ids = []

        for _ in range(10):
            node_id = _new_id()
            all_node_ids.append(node_id)
            self.client.provision(_new_id(), [node_id], "empty",
                site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.PENDING, all_node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING, *all_node_ids)

        for node_id in all_node_ids:
            node = self.store.get_node(node_id)
            self.driver.set_node_running(node['iaas_id'])

        self.notifier.wait_for_state(InstanceState.RUNNING, all_node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.RUNNING, *all_node_ids)
Ejemplo n.º 2
0
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = yield self.setup_store()
        self.site_drivers = {'fake-site1': FakeNodeDriver()}

        yield self._start_container()
        yield self.spawn_procs()

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)
Ejemplo n.º 3
0
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1': self.site1_driver, 'site2': self.site2_driver}
        self.core = ProvisionerCore(store=self.store,
                                    notifier=self.notifier,
                                    dtrs=self.dtrs,
                                    context=self.ctx,
                                    site_drivers=drivers)
Ejemplo n.º 4
0
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.dtrs.sites = {
            "asterix": {
                "site1": {
                    "type": "fake"
                }
            },
            None: {
                "site1": {
                    "type": "fake"
                },
                "site2": {
                    "type": "fake"
                }
            }
        }

        self.dtrs.credentials['site'][("asterix", "site1")] = self.dtrs.credentials['site'][("asterix", "site2")] = {
            "access_key": "mykey",
            "secret_key": "mysecret"
        }

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()
        self.site1_driver.initialize()
        self.site2_driver.initialize()

        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx)
Ejemplo n.º 5
0
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = None

        self.store = self.setup_store()
        self.driver = FakeNodeDriver()
        self.driver.initialize()

        self.spawn_procs()
        self.load_dtrs()
Ejemplo n.º 6
0
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1' : self.site1_driver, 'site2' : self.site2_driver}
        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx,
                                    site_drivers=drivers)
Ejemplo n.º 7
0
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = yield self.setup_store()
        self.site_drivers = {'fake-site1' : FakeNodeDriver()}

        yield self._start_container()
        yield self.spawn_procs()

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)
Ejemplo n.º 8
0
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = ProvisionerStore()
        self.site_drivers = {'fake-site1': FakeNodeDriver()}

        yield self._start_container()
        yield self.spawn_procs()

        self.fakecore = TerminateAllFakeCore()
        self.patch(self.provisioner, "core", self.fakecore)

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)
Ejemplo n.º 9
0
    def setUp(self):

        # @itv decorator is gone. This test could probably go away entirely but I'v
        # found it personally useful. Unconditionally skipping for now, til we know
        # what to do with it.
        raise unittest.SkipTest("developer-only Nimbus integration test")

        # skip this test if IaaS credentials are unavailable
        maybe_skip_test()

        self.notifier = FakeProvisionerNotifier()
        self.context_client = get_context_client()

        self.store = yield self.setup_store()
        self.site_drivers = provisioner.get_site_drivers(
            get_nimbus_test_sites())

        yield self._start_container()
        yield self.spawn_procs()

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)
Ejemplo n.º 10
0
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1': self.site1_driver, 'site2': self.site2_driver}
        self.core = ProvisionerCore(store=self.store,
                                    notifier=self.notifier,
                                    dtrs=self.dtrs,
                                    context=self.ctx,
                                    site_drivers=drivers)

    @defer.inlineCallbacks
    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        nodes = {
            "i1": dict(ids=[_new_id()], site="chicago", allocation="small")
        }
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {
            'document': "<fake>document</fake>",
            "nodes": {
                "i1": {}
            }
        }
        nodes = {"i1": dict(ids=[_new_id()], site="site1", allocation="small")}
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_execute(self):
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    @defer.inlineCallbacks
    def test_prepare_execute_iaas_fail(self):
        self.site1_driver.create_node_error = InvalidCredsError()
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def _prepare_execute(self):
        self.dtrs.result = {
            'document': _get_one_node_cluster_doc("node1", "image1"),
            "nodes": {
                "node1": {}
            }
        }
        request_node = dict(ids=[_new_id()], site="site1", allocation="small")
        request_nodes = {"node1": request_node}
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=request_nodes)

        launch, nodes = yield self.core.prepare_provision(request)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], request_node['ids'][0])
        self.assertEqual(launch['launch_id'], request['launch_id'])

        self.assertTrue(self.ctx.last_create)
        self.assertEqual(launch['context'], self.ctx.last_create)
        for key in ('uri', 'secret', 'context_id', 'broker_uri'):
            self.assertIn(key, launch['context'])
        self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        yield self.core.execute_provision(launch, nodes)

    @defer.inlineCallbacks
    def test_execute_bad_doc(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': "<this><isnt><a><real><doc>",
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        #self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    @defer.inlineCallbacks
    def test_execute_bad_doc_nodes(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "adifferentname"
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_execute_bad_doc_node_count(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }

        # two nodes where doc expects 1
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "node1"
        }, {
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "node1"
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        ts = time.time() - 30.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts
        }
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                                       driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 0)

    @defer.inlineCallbacks
    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts
        }
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                                       driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts,
            'iaas_id': iaas_node.id,
            'site': 'site1'
        }

        req_node = {
            'launch_id': launch_id,
            'node_id': _new_id(),
            'state': states.REQUESTED
        }
        nodes = [node, req_node]
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)
        yield self.store.put_node(req_node)

        yield self.core.query_one_site('site1', nodes)

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.STARTED)

        # query again should detect no changes
        yield self.core.query_one_site('site1', nodes)

        # now destroy
        yield self.core.terminate_nodes([node_id])
        node = yield self.store.get_node(node_id)
        yield self.core.query_one_site('site1', [node])

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)

    @defer.inlineCallbacks
    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        #first query with no ctx nodes. zero records should be updated
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))

        # all but 1 node have reported ok
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(node_records[i]['public_ip'], _new_id(),
                                  _new_id()) for i in range(node_count - 1)
        ]

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count - 1)

        #last node reports ok
        self.ctx.nodes.append(
            _one_fake_ctx_node_ok(node_records[-1]['public_ip'], _new_id(),
                                  _new_id()))

        self.ctx.complete = True
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))

    @defer.inlineCallbacks
    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(node_records[i]['public_ip'], _new_id(),
                                  _new_id()) for i in range(node_count - 1)
        ]
        self.ctx.nodes.append(
            _one_fake_ctx_node_error(node_records[-1]['public_ip'], _new_id(),
                                     _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count - 1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(
            self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    @defer.inlineCallbacks
    def test_query_ctx_nodes_not_started(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING) for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    @defer.inlineCallbacks
    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.query_error = ContextNotFoundError()
        yield self.core.query_contexts()

        self.assertTrue(
            self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None)
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

        iaas_node = Mock(public_ip=[], private_ip=[])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED) for i in range(5)]
        ctx_nodes = [
            _one_fake_ctx_node_ok(node['public_ip'], _new_id(), _new_id())
            for node in nodes
        ]

        self.assertEquals(len(nodes),
                          len(update_nodes_from_context(nodes, ctx_nodes)))

    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED) for i in range(5)]
        #libcloud puts the hostname in the public_ip field
        ctx_nodes = [
            _one_fake_ctx_node_ok(ip=_new_id(),
                                  hostname=node['public_ip'],
                                  pubkey=_new_id()) for node in nodes
        ]

        self.assertEquals(len(nodes),
                          len(update_nodes_from_context(nodes, ctx_nodes)))

    @defer.inlineCallbacks
    def test_query_broker_exception(self):
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = yield self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(ok_node['public_ip'], _new_id(), _new_id())
        ]
        self.ctx.complete = True
        yield self.core.query_contexts()

        launches = yield self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_ctx_without_valid_nodes(self):

        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = yield self.store.get_node(
            error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        yield self.store.put_node(error_launch_node)

        yield self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = yield self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_unexpected_exception(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)
        self.ctx.query_error = ValueError("bad programmer")

        # digging into internals a bit: patching one of the methods query()
        # calls to raise an exception. This will let us ensure exceptions do
        # not bubble up
        def raiser(self):
            raise KeyError("notreallyaproblem")

        self.patch(self.core, 'query_nodes', raiser)

        yield self.core.query()  # ensure that exception doesn't bubble up

    @defer.inlineCallbacks
    def test_dump_state(self):
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING, nodes)
            yield self.store.put_launch(launch)
            yield self.store.put_nodes(nodes)

        yield self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    @defer.inlineCallbacks
    def test_mark_nodes_terminating(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING) for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        first_two_node_ids = [
            node_records[0]['node_id'], node_records[1]['node_id']
        ]
        yield self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(
            self.notifier.assure_state(states.TERMINATING,
                                       nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = yield self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)
Ejemplo n.º 11
0
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.dtrs.sites["site1"] = self.dtrs.sites["site2"] = {
            "type": "fake"
        }

        self.dtrs.credentials[("asterix", "site1")] = self.dtrs.credentials[("asterix", "site2")] = {
            "access_key": "mykey",
            "secret_key": "mysecret"
        }

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()
        self.site1_driver.initialize()
        self.site2_driver.initialize()

        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx)

    def test_terminate_all(self):
        caller = 'asterix'
        running_launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(
            running_launch_id, 3, states.RUNNING, caller=caller)
        self.store.add_launch(running_launch)
        for node in running_nodes:
            self.store.add_node(node)

        pending_launch_id = _new_id()
        pending_launch, pending_nodes = make_launch_and_nodes(
            pending_launch_id, 3, states.PENDING, caller=caller)
        self.store.add_launch(pending_launch)
        for node in pending_nodes:
            self.store.add_node(node)

        terminated_launch_id = _new_id()
        terminated_launch, terminated_nodes = make_launch_and_nodes(
            terminated_launch_id, 3, states.TERMINATED, caller=caller)
        self.store.add_launch(terminated_launch)
        for node in terminated_nodes:
            self.store.add_node(node)

        self.core.terminate_all()

        all_nodes = self.store.get_nodes()
        self.assertEqual(9, len(all_nodes))
        self.assertTrue(all(n['state'] == states.TERMINATING or
                        n['state'] == states.TERMINATED for n in all_nodes))

    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        self.core.prepare_provision(
            launch_id=_new_id(), deployable_type="foo",
            instance_ids=[_new_id()], site="chicago")
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {'document': "<fake>document</fake>",
                            "node": {}}
        self.core.prepare_provision(
            launch_id=_new_id(), deployable_type="foo",
            instance_ids=[_new_id()], site="chicago")
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_execute(self):
        self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def test_prepare_execute_iaas_fail(self):
        with patch('epu.provisioner.test.util.FakeNodeDriver.create_node') as mock_method:
            mock_method.return_value = InvalidCredsError()
            self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_execute_no_ctx(self):
        self.core.context = None

        # just in case
        self.ctx.create_error = NotImplementedError()
        self.ctx.query_error = NotImplementedError()

        self._prepare_execute(context_enabled=False)
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def test_prepare_execute_existing_launch(self):
        self.core.context = None
        launch_id = _new_id()
        instance_id = _new_id()

        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False)
        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False, assure_state=False)

        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def _prepare_execute(self, launch_id=None, instance_ids=None,
                         context_enabled=True, assure_state=True):
        self.dtrs.result = {'document': _get_one_node_cluster_doc("node1", "image1"),
                            "node": {}}

        caller = "asterix"
        if not launch_id:
            launch_id = _new_id()
        if not instance_ids:
            instance_ids = [_new_id()]
        launch, nodes = self.core.prepare_provision(
            launch_id=launch_id,
            deployable_type="foo", instance_ids=instance_ids,
            site="site1", caller=caller)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], instance_ids[0])
        self.assertEqual(launch['launch_id'], launch_id)
        self.assertEqual(launch['node_ids'], instance_ids)

        if context_enabled:
            self.assertTrue(self.ctx.last_create)
            self.assertEqual(launch['context'], self.ctx.last_create)
            for key in ('uri', 'secret', 'context_id', 'broker_uri'):
                self.assertIn(key, launch['context'])
        else:
            self.assertEqual(launch['context'], None)

        if assure_state:
            self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        self.core.execute_provision(launch, nodes, caller)

    def test_execute_bad_doc(self):
        caller = "asterix"
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': "<this><isnt><a><real><doc>",
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                 'state': states.REQUESTED, 'creator': caller}]

        self.store.add_launch(launch_record)
        self.store.add_node(nodes[0])

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        # self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    def test_execute_bad_doc_nodes(self):
        caller = 'asterix'
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                  'state': states.REQUESTED, 'ctx_name': "adifferentname",
                  'creator': caller}]

        self.store.add_launch(launch_record)
        self.store.add_node(nodes[0])

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_execute_bad_doc_node_count(self):
        caller = "asterix"
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}

        # two nodes where doc expects 1
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                 'state': states.REQUESTED, 'ctx_name': "node1", 'creator': caller},
                 {'node_id': 'node2', 'launch_id': "thelaunchid",
                     'state': states.REQUESTED, 'ctx_name': "node1",
                     'creator': caller}]

        self.store.add_launch(launch_record)
        for node in nodes:
            self.store.add_node(node)

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        caller = 'asterix'
        ts = time.time() - 30.0
        launch = {'launch_id': launch_id, 'node_ids': [node_id],
                  'state': states.PENDING,
                  'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 0)

    def test_query_missing_started_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        caller = 'asterix'
        ts = time.time() - 30.0
        launch = {'launch_id': launch_id, 'node_ids': [node_id],
                  'state': states.PENDING,
                  'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.STARTED,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'
        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_terminating(self):
        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.RUNNING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.TERMINATING,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.TERMINATED))

    def test_query(self):
        caller = "asterix"
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'creator': caller,
                'site': 'site1'}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        self.core.query_one_site('site1', nodes, caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node.get('public_ip'), iaas_node.public_ip)
        self.assertEqual(node.get('private_ip'), iaas_node.private_ip)
        self.assertEqual(node.get('state'), states.STARTED)

        # query again should detect no changes
        self.core.query_one_site('site1', nodes, caller=caller)

        # now destroy
        self.core.terminate_nodes([node_id], remove_terminating=False)
        node = self.store.get_node(node_id)
        self.core.query_one_site('site1', [node], caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)

    def test_terminate_requested_node(self):
        caller = "asterix"
        launch_id = _new_id()
        node_id = _new_id()

        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        req_node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.REQUESTED,
            'site': 'site1'}
        self.store.add_launch(launch)
        self.store.add_node(req_node)

        # destroy
        self.core.terminate_nodes([node_id], remove_terminating=False)
        node = self.store.get_node(node_id)
        self.assertEqual(node['state'], states.TERMINATED)

    def test_query_no_contextualization(self):

        self.core.context = None

        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'site': 'site1',
                'creator': caller}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        self.core.query_one_site('site1', nodes, caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node.get('public_ip'), iaas_node.public_ip)
        self.assertEqual(node.get('private_ip'), iaas_node.private_ip)

        # since contextualization is disabled we should jump straight
        # to RUNNING
        self.assertEqual(node.get('state'), states.RUNNING)

    @raises(timeout)
    def test_query_iaas_timeout(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        caller = 'asterix'
        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'name': 'hello',
                'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'site': 'site1',
                'creator': caller}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        def x():
            raise timeout("Took too long to query iaas")
        self.core._IAAS_DEFAULT_TIMEOUT = 0.5

        with patch.object(FakeNodeDriver, 'list_nodes', side_effect=x):
            self.core.query_one_site('site1', nodes, caller=caller)

    def test_launch_one_iaas_full(self):
        def x(**kwargs):
            raise Exception("InstanceLimitExceeded: too many vms :(")

        with patch.object(FakeNodeDriver, 'create_node', side_effect=x):
            self.core._IAAS_DEFAULT_TIMEOUT = 0.5

            node_id = _new_id()
            launch_id = _new_id()

            self._prepare_execute(launch_id=launch_id, instance_ids=[node_id])

            self.assertTrue(self.notifier.assure_state(states.FAILED))
            self.assertIn('IAAS_FULL', self.notifier.nodes[node_id]['state_desc'])
            launch = self.store.get_launch(launch_id)
            self.assertEqual(launch['state'], states.FAILED)

    def test_launch_one_iaas_timeout(self):
        def x(**kwargs):
            raise timeout("Launch took too long")

        with patch.object(FakeNodeDriver, 'create_node', side_effect=x):
            self.core._IAAS_DEFAULT_TIMEOUT = 0.5

            node_id = _new_id()
            launch_id = _new_id()

            self._prepare_execute(launch_id=launch_id, instance_ids=[node_id])

            self.assertTrue(self.notifier.assure_state(states.FAILED))
            self.assertEqual(self.notifier.nodes[node_id]['state_desc'], 'IAAS_TIMEOUT')
            launch = self.store.get_launch(launch_id)
            self.assertEqual(launch['state'], states.FAILED)

    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # first query with no ctx nodes. zero records should be updated
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]

        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count - 1)

        # last node reports ok
        self.ctx.nodes.append(_one_fake_ctx_node_ok(node_records[-1]['public_ip'],
                              _new_id(), _new_id()))

        self.ctx.complete = True
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))

    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]
        self.ctx.nodes.append(_one_fake_ctx_node_error(node_records[-1]['public_ip'],
                              _new_id(), _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count - 1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    def test_query_ctx_nodes_not_pending(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.REQUESTED)
                        for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    def test_query_ctx_nodes_pending_but_actually_running(self):
        """
        When doing large runs, a few EC2 instances get their status changed to
        "running" a long time after having requested them (up to 15 minutes,
        compared to about 30 seconds normally).
        It appears that these instances have been booted successfully for a
        while, because they are reachable through SSH and the context broker
        has OK'ed them.
        Test that we detect these "pending but actually running" instances
        early.
        """
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING)
                        for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(),
                          _new_id()) for node in node_records]
        self.ctx.complete = True

        self.core.query_contexts()

        launch = self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.RUNNING)

        for node_id in launch['node_ids']:
            node = self.store.get_node(node_id)
            self.assertEqual(states.RUNNING, node['state'])

    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.query_error = ContextNotFoundError()
        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_query_ctx_with_one_node_timeout(self):
        launch_id = _new_id()
        node_record = make_node(launch_id, states.STARTED)
        launch_record = make_launch(launch_id, states.PENDING, [node_record])

        ts = time.time()
        node_record['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        self.store.add_node(node_record)

        self.ctx.expected_count = 1
        self.ctx.complete = False
        self.ctx.error = False

        self.ctx.nodes = []
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED))
        self.assertTrue(self.notifier.assure_record_count(1))

    def test_query_ctx_with_several_nodes_timeout(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        node_ids = map(lambda node: node['node_id'], node_records)

        ts = time.time()
        for i in range(node_count - 1):
            node_records[i]['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT + 10
        node_records[-1]['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]

        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING, node_ids[:node_count - 1]))
        self.assertEqual(len(self.notifier.nodes), node_count)
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids[node_count - 1:]))
        self.assertTrue(self.notifier.assure_record_count(1, node_ids[node_count - 1:]))

    def test_query_ctx_with_no_timeout(self):
        caller = "asterix"
        launch_id = _new_id()
        node_record = make_node(launch_id, states.STARTED)
        launch_record = make_launch(launch_id, states.PENDING, [node_record],
                                    caller=caller)

        ts = time.time()
        node_record['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        self.store.add_node(node_record)

        self.ctx.expected_count = 1
        self.ctx.complete = False
        self.ctx.error = False

        self.ctx.nodes = [_one_fake_ctx_node_not_done(node_record['public_ip'],
                          _new_id(), _new_id())]
        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_record_count(0))

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None, extra={'dns_name': None})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"], extra={'dns_name': 'host'})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")
        self.assertEqual(node['hostname'], "host")

        iaas_node = Mock(public_ip=[], private_ip=[], extra={'dns_name': []})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")
        self.assertEqual(node['hostname'], "host")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                 for i in range(5)]
        ctx_nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(),
                     _new_id()) for node in nodes]

        self.assertEquals(
            len(nodes),
            len(update_nodes_from_context(match_nodes_from_context(nodes, ctx_nodes))))

    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                 for i in range(5)]
        # libcloud puts the hostname in the public_ip field
        ctx_nodes = [_one_fake_ctx_node_ok(ip=_new_id(), hostname=node['public_ip'],
                     pubkey=_new_id()) for node in nodes]

        self.assertEquals(
            len(nodes),
            len(update_nodes_from_context(match_nodes_from_context(nodes, ctx_nodes))))

    def test_query_broker_exception(self):
        caller = "asterix"
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING, node_records, caller=caller)

            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [_one_fake_ctx_node_ok(ok_node['public_ip'],
                          _new_id(), _new_id())]
        self.ctx.complete = True
        self.core.query_contexts()

        launches = self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    def test_query_ctx_without_valid_nodes(self):

        caller = "asterix"
        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records, caller=caller)

            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        launches = self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = self.store.get_node(error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        self.store.update_node(error_launch_node)

        self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    def test_dump_state(self):
        caller = "asterix"
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING,
                                 nodes, caller=caller)
            self.store.add_launch(launch)
            for node in nodes:
                self.store.add_node(node)

        self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    def test_mark_nodes_terminating(self):
        caller = "asterix"
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING)
                        for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records, caller=caller)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        first_two_node_ids = [node_records[0]['node_id'],
                              node_records[1]['node_id']]
        self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(self.notifier.assure_state(states.TERMINATING,
                                                   nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)

    def test_describe(self):
        caller = "asterix"
        node_ids = []
        for _ in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.RUNNING)]
            node_ids.append(node_records[0]['node_id'])
            launch_record = make_launch(
                launch_id, states.PENDING,
                node_records, caller=caller)
            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        all_nodes = self.core.describe_nodes()
        all_node_ids = [n['node_id'] for n in all_nodes]
        self.assertEqual(set(all_node_ids), set(node_ids))
        self.assertFalse(any(VERSION_KEY in n for n in all_nodes))

        all_nodes = self.core.describe_nodes(node_ids)
        all_node_ids = [m['node_id'] for m in all_nodes]
        self.assertEqual(set(all_node_ids), set(node_ids))

        subset_nodes = self.core.describe_nodes(node_ids[1:])
        subset_node_ids = [o['node_id'] for o in subset_nodes]
        self.assertEqual(set(subset_node_ids), set(node_ids[1:]))

        one_node = self.core.describe_nodes([node_ids[0]])
        self.assertEqual(len(one_node), 1)
        self.assertEqual(one_node[0]['node_id'], node_ids[0])
        self.assertEqual(one_node[0]['state'], states.RUNNING)

        self.assertNotIn(VERSION_KEY, one_node[0])

        try:
            self.core.describe_nodes([node_ids[0], "not-a-real-node"])
        except KeyError:
            pass
        else:
            self.fail("Expected exception for bad node_id")

    def test_maybe_update_node(self):

        node = dict(launch_id="somelaunch", node_id="anode",
                    state=states.REQUESTED)
        self.store.add_node(node)

        node2 = self.store.get_node("anode")

        node['state'] = states.PENDING
        self.store.update_node(node)

        # this should succeed even though we are basing off of an older copy
        node2['state'] = states.RUNNING
        node3, updated = self.core.maybe_update_node(node2)
        self.assertTrue(updated)
        self.assertEqual(node3['state'], states.RUNNING)

        node4 = self.store.get_node("anode")
        self.assertEqual(node4['state'], states.RUNNING)

    def test_out_of_order_launch_and_terminate(self):

        # test case where a node terminate request arrives before
        # the launch request.
        self.core.context = None
        launch_id = _new_id()
        instance_id = _new_id()

        self.core.mark_nodes_terminating([instance_id])
        self.assertTrue(self.notifier.assure_state(states.TERMINATED,
                                                   nodes=[instance_id]))
        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False, assure_state=False)
        self.assertTrue(self.notifier.assure_state(states.TERMINATED,
                                                   nodes=[instance_id]))
        # make sure nothing was launched
        self.assertFalse(self.site1_driver.list_nodes())
Ejemplo n.º 12
0
class ProvisionerServiceTest(BaseProvisionerServiceTests):
    """Integration tests that use fake context broker and IaaS driver fixtures
    """

    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = self.setup_store()
        self.driver = FakeNodeDriver()
        self.driver.initialize()

        self.spawn_procs()

        self.load_dtrs()

    def test_provision_bad_dt(self):
        client = self.client
        notifier = self.notifier

        deployable_type = 'this-doesnt-exist'
        launch_id = _new_id()

        node_ids = [_new_id()]

        client.provision(launch_id, node_ids, deployable_type,
            'fake-site1', caller="asterix")

        ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)
        self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)

    def test_provision_with_vars(self):
        client = self.client
        caller = 'asterix'

        deployable_type = 'empty-with-vars'
        launch_id = _new_id()

        node_ids = [_new_id()]

        vars = {'image_id': 'fake-image'}
        client.provision(launch_id, node_ids, deployable_type,
            'fake-site1', vars=vars, caller=caller)
        self.notifier.wait_for_state(InstanceState.PENDING, node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING, *node_ids)

    def test_provision_with_missing_vars(self):
        client = self.client
        notifier = self.notifier
        caller = 'asterix'

        deployable_type = 'empty-with-vars'
        launch_id = _new_id()

        node_ids = [_new_id()]

        vars = {'foo': 'bar'}
        client.provision(launch_id, node_ids, deployable_type,
            'fake-site1', vars=vars, caller=caller)

        ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)
        self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)

    def test_provision_broker_error(self):
        client = self.client
        notifier = self.notifier

        deployable_type = 'empty'

        launch_id = _new_id()

        self.context_client.create_error = BrokerError("fake failure")

        node_ids = [_new_id()]

        client.provision(launch_id, node_ids, deployable_type,
            'fake-site1', caller="asterix")

        ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)
        self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)

    def test_dump_state(self):
        running_launch, running_nodes = make_launch_and_nodes(_new_id(), 10, InstanceState.RUNNING)
        self.store.add_launch(running_launch)
        for node in running_nodes:
            self.store.add_node(node)

        pending_launch, pending_nodes = make_launch_and_nodes(_new_id(), 3, InstanceState.PENDING)
        self.store.add_launch(pending_launch)
        for node in pending_nodes:
            self.store.add_node(node)

        running_node_ids = [node['node_id'] for node in running_nodes]
        pending_node_ids = [node['node_id'] for node in pending_nodes]
        all_node_ids = running_node_ids + pending_node_ids

        self.client.dump_state(running_node_ids)
        ok = self.notifier.wait_for_state(InstanceState.RUNNING, nodes=running_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(running_nodes))

        self.client.dump_state(pending_node_ids)
        ok = self.notifier.wait_for_state(InstanceState.PENDING, nodes=pending_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(all_node_ids))

        # we should have not gotten any dupe records yet
        self.assertTrue(self.notifier.assure_record_count(1))

        # empty dump request should dump nothing
        self.client.dump_state([])
        self.assertTrue(self.notifier.assure_record_count(1))

    def test_terminate(self):

        node_ids = []
        for _ in range(10):
            node_id = _new_id()
            node_ids.append(node_id)
            self.client.provision(_new_id(), [node_id], "empty",
                site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.PENDING, node_ids,
            before=self.provisioner.leader._force_cycle)

        for node_id in node_ids:
            node = self.store.get_node(node_id)
            self.driver.set_node_running(node['iaas_id'])

        self.notifier.wait_for_state(InstanceState.STARTED, node_ids,
            before=self.provisioner.leader._force_cycle)

        # terminate half of the nodes then the rest
        first_five = node_ids[:5]
        last_five = node_ids[5:]
        self.client.terminate_nodes(first_five, caller="asterix")
        ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=first_five)
        self.assertTrue(ok)

        self.client.terminate_nodes(last_five, caller="asterix")
        ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=last_five)
        self.assertTrue(ok)
        self.assertEqual(set(node_ids), set(self.notifier.nodes))
        # should be REQUESTED, PENDING, STARTED, TERMINATING and TERMINATED records for each node
        self.assertTrue(self.notifier.assure_record_count(5))

        self.assertEqual(len(self.driver.destroyed),
                         len(node_ids))

    def test_terminate_unknown(self):
        instance_id = _new_id()
        self.client.terminate_nodes([instance_id])
        ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=[instance_id])
        self.assertTrue(ok)

    def test_launch_allocation(self):

        node_id = _new_id()
        self.client.provision(_new_id(), [node_id], "empty",
            site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.PENDING, [node_id],
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING)

        self.assertEqual(len(self.driver.created), 1)
        libcloud_node = self.driver.created[0]
        self.assertEqual(libcloud_node.size.id, "m1.small")

    def test_launch_many_terminate_all(self):

        all_node_ids = []

        # after the terminate_all, provision requests should be REJECTED
        rejected_node_ids = []

        for _ in range(100):
            node_id = _new_id()
            all_node_ids.append(node_id)
            self.client.provision(_new_id(), [node_id], "empty",
                site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.PENDING, all_node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING, *all_node_ids)

        for node_id in all_node_ids:
            node = self.store.get_node(node_id)
            self.driver.set_node_running(node['iaas_id'])

        self.notifier.wait_for_state(InstanceState.STARTED, all_node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.STARTED, *all_node_ids)

        log.debug("Expecting %d nodes to be terminated", len(all_node_ids))

        self.assertIs(self.client.terminate_all(), False)

        # future requests should be rejected
        for _ in range(5):
            node_id = _new_id()
            rejected_node_ids.append(node_id)
            self.client.provision(_new_id(), [node_id], "empty",
                site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.TERMINATED, all_node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.TERMINATED, *all_node_ids)

        self.notifier.wait_for_state(InstanceState.REJECTED, rejected_node_ids)
        self.assertStoreNodeRecords(InstanceState.REJECTED, *rejected_node_ids)

        self.assertEqual(len(self.driver.destroyed),
                         len(all_node_ids))

        self.assertIs(self.client.terminate_all(), True)

        # now re-enable
        self.client.enable()

        node_id = _new_id()
        log.debug("Launching node %s which should be accepted", node_id)
        self.client.provision(_new_id(), [node_id], "empty",
            site="fake-site1", caller="asterix")

        self.notifier.wait_for_state(InstanceState.PENDING, [node_id],
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING, node_id)

    def test_describe(self):
        node_ids = []
        for _ in range(3):
            launch_id = _new_id()
            running_launch, running_nodes = make_launch_and_nodes(launch_id, 1,
                InstanceState.RUNNING,
                site="fake-site1", caller=self.default_user)
            self.store.add_launch(running_launch)
            for node in running_nodes:
                self.store.add_node(node)
            node_ids.append(running_nodes[0]['node_id'])

        log.debug("requestin")
        all_nodes = self.client.describe_nodes()
        self.assertEqual(len(all_nodes), len(node_ids))

        one_node = self.client.describe_nodes([node_ids[0]])
        self.assertEqual(len(one_node), 1)
        self.assertEqual(one_node[0]['node_id'], node_ids[0])

    def test_multiuser(self):
        """Test that nodes started by one user can't be modified by
        another user
        """
        permitted_user = "******"
        disallowed_user = "******"

        client = self.client

        deployable_type = 'empty'
        launch_id = _new_id()

        node_ids = [_new_id()]

        vars = {'image_id': 'fake-image'}
        client.provision(launch_id, node_ids, deployable_type,
            'fake-site1', vars=vars, caller=permitted_user)
        self.notifier.wait_for_state(InstanceState.PENDING, node_ids,
            before=self.provisioner.leader._force_cycle)
        self.assertStoreNodeRecords(InstanceState.PENDING, *node_ids)

        # Test describe
        permitted_nodes = client.describe_nodes(caller=permitted_user)
        self.assertEqual(len(permitted_nodes), len(node_ids))

        disallowed_nodes = client.describe_nodes(caller=disallowed_user)
        self.assertEqual(len(disallowed_nodes), 0)

        # Test terminate
        client.terminate_nodes(node_ids, caller=disallowed_user)

        terminate_timed_out = False
        try:
            self.notifier.wait_for_state(InstanceState.TERMINATED, node_ids,
                before=self.provisioner.leader._force_cycle, timeout=2)
        except Exception:
            terminate_timed_out = True

        self.assertTrue(terminate_timed_out,
                msg="Terminate worked with non-matching user")

        client.terminate_nodes(node_ids, caller=permitted_user)
        self.notifier.wait_for_state(InstanceState.TERMINATED, node_ids,
            before=self.provisioner.leader._force_cycle, timeout=2)
        self.assertStoreNodeRecords(InstanceState.TERMINATED, *node_ids)

    def test_record_reaper(self):
        launch_id1 = _new_id()
        launch_id2 = _new_id()

        now = time.time()
        node1 = make_node(launch_id1, InstanceState.TERMINATED, caller=self.default_user,
                          state_changes=[(InstanceState.TERMINATED, now - self.record_reaping_max_age - 1)])
        node2 = make_node(launch_id1, InstanceState.FAILED, caller=self.default_user,
                          state_changes=[(InstanceState.FAILED, now - self.record_reaping_max_age - 1)])
        node3 = make_node(launch_id1, InstanceState.REJECTED, caller=self.default_user,
                          state_changes=[(InstanceState.REJECTED, now - self.record_reaping_max_age - 1)])
        nodes1 = [node1, node2, node3]
        launch1 = make_launch(launch_id1, InstanceState.RUNNING, nodes1, caller=self.default_user)

        node4 = make_node(launch_id2, InstanceState.RUNNING, caller=self.default_user,
                          state_changes=[(InstanceState.RUNNING, now - self.record_reaping_max_age - 1)])
        node5 = make_node(launch_id2, InstanceState.TERMINATED, caller=self.default_user,
                          state_changes=[(InstanceState.TERMINATED, now - self.record_reaping_max_age - 1)])
        nodes2 = [node4, node5]
        launch2 = make_launch(launch_id2, InstanceState.RUNNING, nodes2, caller=self.default_user)

        self.store.add_launch(launch1)
        for node in nodes1:
            self.store.add_node(node)

        self.store.add_launch(launch2)
        for node in nodes2:
            self.store.add_node(node)

        # Force a record reaping cycle
        self.provisioner.leader._force_record_reaping()

        # Check that the first launch is completely removed
        node_ids1 = map(lambda x: x['node_id'], nodes1)
        self.assertNoStoreNodeRecords(*node_ids1)
        self.assertNoStoreLaunchRecord(launch_id1)

        # Check that the second launch is still here but with only the running node
        self.assertStoreNodeRecords(InstanceState.RUNNING, node4['node_id'])
        self.assertStoreLaunchRecord(InstanceState.RUNNING, launch_id2)
Ejemplo n.º 13
0
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1' : self.site1_driver, 'site2' : self.site2_driver}
        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx,
                                    site_drivers=drivers)

    @defer.inlineCallbacks
    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        nodes = {"i1" : dict(ids=[_new_id()], site="chicago", allocation="small")}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {'document' : "<fake>document</fake>",
                            "nodes" : {"i1" : {}}}
        nodes = {"i1" : dict(ids=[_new_id()], site="site1", allocation="small")}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_execute(self):
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    @defer.inlineCallbacks
    def test_prepare_execute_iaas_fail(self):
        self.site1_driver.create_node_error = InvalidCredsError()
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def _prepare_execute(self):
        self.dtrs.result = {'document' : _get_one_node_cluster_doc("node1", "image1"),
                            "nodes" : {"node1" : {}}}
        request_node = dict(ids=[_new_id()], site="site1", allocation="small")
        request_nodes = {"node1" : request_node}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=request_nodes)

        launch, nodes = yield self.core.prepare_provision(request)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], request_node['ids'][0])
        self.assertEqual(launch['launch_id'], request['launch_id'])

        self.assertTrue(self.ctx.last_create)
        self.assertEqual(launch['context'], self.ctx.last_create)
        for key in ('uri', 'secret', 'context_id', 'broker_uri'):
            self.assertIn(key, launch['context'])
        self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        yield self.core.execute_provision(launch, nodes)

    @defer.inlineCallbacks
    def test_execute_bad_doc(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : "<this><isnt><a><real><doc>",
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        #self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    @defer.inlineCallbacks
    def test_execute_bad_doc_nodes(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : _get_one_node_cluster_doc("node1", "image1"),
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "adifferentname"}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_execute_bad_doc_node_count(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : _get_one_node_cluster_doc("node1", "image1"),
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}

        # two nodes where doc expects 1
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "node1"},
                 {'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "node1"}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))


    @defer.inlineCallbacks
    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        ts = time.time() - 30.0
        launch = {'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts}
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 0)
    
    @defer.inlineCallbacks
    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        ts = time.time() - 120.0
        launch = {
                'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts}
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
                'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts,
                'iaas_id' : iaas_node.id,
                'site':'site1'}

        req_node = {'launch_id' : launch_id,
                'node_id' : _new_id(),
                'state' : states.REQUESTED}
        nodes = [node, req_node]
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)
        yield self.store.put_node(req_node)

        yield self.core.query_one_site('site1', nodes)

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.STARTED)

        # query again should detect no changes
        yield self.core.query_one_site('site1', nodes)

        # now destroy
        yield self.core.terminate_nodes([node_id])
        node = yield self.store.get_node(node_id)
        yield self.core.query_one_site('site1', [node])

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)


    @defer.inlineCallbacks
    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        #first query with no ctx nodes. zero records should be updated
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))
        
        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'], 
            _new_id(),  _new_id()) for i in range(node_count-1)]

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count-1)

        #last node reports ok
        self.ctx.nodes.append(_one_fake_ctx_node_ok(node_records[-1]['public_ip'],
            _new_id(), _new_id()))

        self.ctx.complete = True
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))
    
    @defer.inlineCallbacks
    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'], 
            _new_id(),  _new_id()) for i in range(node_count-1)]
        self.ctx.nodes.append(_one_fake_ctx_node_error(node_records[-1]['public_ip'],
            _new_id(), _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count-1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    @defer.inlineCallbacks
    def test_query_ctx_nodes_not_started(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING)
                for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    @defer.inlineCallbacks
    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.query_error = ContextNotFoundError()
        yield self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None)
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

        iaas_node = Mock(public_ip=[], private_ip=[])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                for i in range(5)]
        ctx_nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(), 
            _new_id()) for node in nodes]

        self.assertEquals(len(nodes), len(update_nodes_from_context(nodes, ctx_nodes)))
        
    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                for i in range(5)]
        #libcloud puts the hostname in the public_ip field
        ctx_nodes = [_one_fake_ctx_node_ok(ip=_new_id(), hostname=node['public_ip'],
            pubkey=_new_id()) for node in nodes]

        self.assertEquals(len(nodes), len(update_nodes_from_context(nodes, ctx_nodes)))

    @defer.inlineCallbacks
    def test_query_broker_exception(self):
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                                    node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = yield self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [_one_fake_ctx_node_ok(ok_node['public_ip'],
            _new_id(), _new_id())]
        self.ctx.complete = True
        yield self.core.query_contexts()

        launches = yield self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_ctx_without_valid_nodes(self):

        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                                    node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = yield self.store.get_node(error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        yield self.store.put_node(error_launch_node)

        yield self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = yield self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)


    @defer.inlineCallbacks
    def test_query_unexpected_exception(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)
        self.ctx.query_error = ValueError("bad programmer")


        # digging into internals a bit: patching one of the methods query()
        # calls to raise an exception. This will let us ensure exceptions do
        # not bubble up
        def raiser(self):
            raise KeyError("notreallyaproblem")
        self.patch(self.core, 'query_nodes', raiser)

        yield self.core.query() # ensure that exception doesn't bubble up

    @defer.inlineCallbacks
    def test_dump_state(self):
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING,
                                                    nodes)
            yield self.store.put_launch(launch)
            yield self.store.put_nodes(nodes)

        yield self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    @defer.inlineCallbacks
    def test_mark_nodes_terminating(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING)
                        for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        first_two_node_ids = [node_records[0]['node_id'],
                              node_records[1]['node_id']]
        yield self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(self.notifier.assure_state(states.TERMINATING,
                                                   nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = yield self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)
Ejemplo n.º 14
0
class ProvisionerServiceTest(BaseProvisionerServiceTests):
    """Integration tests that use fake context broker and IaaS driver fixtures
    """
    @defer.inlineCallbacks
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = yield self.setup_store()
        self.site_drivers = {'fake-site1': FakeNodeDriver()}

        yield self._start_container()
        yield self.spawn_procs()

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)

    @defer.inlineCallbacks
    def tearDown(self):
        yield self.shutdown_procs()
        yield self.teardown_store()
        yield self._stop_container()

    def setup_store(self):
        return defer.succeed(ProvisionerStore())

    def teardown_store(self):
        return defer.succeed(None)

    @defer.inlineCallbacks
    def test_provision_bad_dt(self):
        client = self.client
        notifier = self.notifier

        worker_node_count = 3
        deployable_type = 'this-doesnt-exist'
        nodes = {
            'head-node':
            FakeLaunchItem(1, 'fake-site1', 'small', None),
            'worker-node':
            FakeLaunchItem(worker_node_count, 'fake-site1', 'small', None)
        }

        launch_id = _new_id()

        node_ids = [
            node_id for node in nodes.itervalues()
            for node_id in node.instance_ids
        ]
        self.assertEqual(len(node_ids), worker_node_count + 1)

        yield client.provision(launch_id, deployable_type, nodes,
                               ('subscriber', ))

        ok = yield notifier.wait_for_state(states.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        yield self.assertStoreNodeRecords(states.FAILED, *node_ids)
        yield self.assertStoreLaunchRecord(states.FAILED, launch_id)

    @defer.inlineCallbacks
    def test_provision_broker_error(self):
        client = self.client
        notifier = self.notifier

        worker_node_count = 3
        deployable_type = 'base-cluster'
        nodes = {
            'head-node':
            FakeLaunchItem(1, 'fake-site1', 'small', None),
            'worker-node':
            FakeLaunchItem(worker_node_count, 'fake-site1', 'small', None)
        }

        launch_id = _new_id()

        node_ids = [
            node_id for node in nodes.itervalues()
            for node_id in node.instance_ids
        ]
        self.assertEqual(len(node_ids), worker_node_count + 1)

        self.context_client.create_error = BrokerError("fake failure")

        yield client.provision(launch_id, deployable_type, nodes,
                               ('subscriber', ))

        ok = yield notifier.wait_for_state(states.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        yield self.assertStoreNodeRecords(states.FAILED, *node_ids)
        yield self.assertStoreLaunchRecord(states.FAILED, launch_id)

    @defer.inlineCallbacks
    def test_dump_state(self):
        running_launch, running_nodes = make_launch_and_nodes(
            _new_id(), 10, states.RUNNING)
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        pending_launch, pending_nodes = make_launch_and_nodes(
            _new_id(), 3, states.PENDING)
        yield self.store.put_launch(pending_launch)
        yield self.store.put_nodes(pending_nodes)

        running_node_ids = [node['node_id'] for node in running_nodes]
        pending_node_ids = [node['node_id'] for node in pending_nodes]
        all_node_ids = running_node_ids + pending_node_ids

        yield self.client.dump_state(running_node_ids)
        ok = yield self.notifier.wait_for_state(states.RUNNING,
                                                nodes=running_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(running_nodes))

        yield self.client.dump_state(pending_node_ids)
        ok = yield self.notifier.wait_for_state(states.PENDING,
                                                nodes=pending_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(all_node_ids))

        # we should have not gotten any dupe records yet
        self.assertTrue(self.notifier.assure_record_count(1))

        # empty dump request should dump nothing
        yield self.client.dump_state([])
        self.assertTrue(self.notifier.assure_record_count(1))

    @defer.inlineCallbacks
    def test_dump_state_unknown_node(self):
        node_ids = ["09ddd3f8-a5a5-4196-ac13-eab4d4b0c777"]
        subscribers = ["hello1_subscriber"]
        yield self.client.dump_state(node_ids, force_subscribe=subscribers[0])
        ok = yield self.notifier.wait_for_state(states.FAILED, nodes=node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(node_ids))
        for node_id in node_ids:
            ok = yield self.notifier.assure_subscribers(node_id, subscribers)
            self.assertTrue(ok)

    @defer.inlineCallbacks
    def test_terminate(self):
        launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(
            launch_id, 10, states.RUNNING, site="fake-site1")
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        node_ids = [node['node_id'] for node in running_nodes]

        # terminate half of the nodes then the launch as a whole
        first_five = node_ids[:5]
        yield self.client.terminate_nodes(first_five)
        ok = yield self.notifier.wait_for_state(states.TERMINATED,
                                                nodes=first_five)
        self.assertTrue(ok)
        self.assertEqual(set(first_five), set(self.notifier.nodes))

        yield self.client.terminate_launches((launch_id, ))
        ok = yield self.notifier.wait_for_state(states.TERMINATED,
                                                nodes=node_ids)
        self.assertTrue(ok)
        self.assertEqual(set(node_ids), set(self.notifier.nodes))
        # should be TERMINATING and TERMINATED record for each node
        self.assertTrue(self.notifier.assure_record_count(2))

        self.assertEqual(len(self.site_drivers['fake-site1'].destroyed),
                         len(node_ids))

    @defer.inlineCallbacks
    def test_terminate_all(self):
        # create a ton of launches
        launch_specs = [(30, 3, states.RUNNING), (50, 1, states.TERMINATED),
                        (80, 1, states.RUNNING)]

        to_be_terminated_node_ids = []

        for launchcount, nodecount, state in launch_specs:
            for i in range(launchcount):
                launch_id = _new_id()
                launch, nodes = make_launch_and_nodes(launch_id,
                                                      nodecount,
                                                      state,
                                                      site="fake-site1")
                yield self.store.put_launch(launch)
                yield self.store.put_nodes(nodes)

                if state < states.TERMINATED:
                    to_be_terminated_node_ids.extend(node["node_id"]
                                                     for node in nodes)

        log.debug("Expecting %d nodes to be terminated",
                  len(to_be_terminated_node_ids))

        yield self.client.terminate_all(rpcwait=True)
        yield self.assertStoreNodeRecords(states.TERMINATED,
                                          *to_be_terminated_node_ids)

        ok = self.notifier.assure_state(states.TERMINATED,
                                        nodes=to_be_terminated_node_ids)
        self.assertTrue(ok)
        self.assertEqual(set(to_be_terminated_node_ids),
                         set(self.notifier.nodes))

        self.assertEqual(len(self.site_drivers['fake-site1'].destroyed),
                         len(to_be_terminated_node_ids))

    @defer.inlineCallbacks
    def test_query(self):
        #default is non-rpc. should be None result
        res = yield self.client.query()
        self.assertEqual(res, None)

        #returns true in RPC case
        res = yield self.client.query(rpc=True)
        self.assertEqual(res, True)
Ejemplo n.º 15
0
class ProvisionerServiceTest(BaseProvisionerServiceTests):
    """Integration tests that use fake context broker and IaaS driver fixtures
    """
    @defer.inlineCallbacks
    def setUp(self):

        self.notifier = FakeProvisionerNotifier()
        self.context_client = FakeContextClient()

        self.store = yield self.setup_store()
        self.site_drivers = {'fake-site1' : FakeNodeDriver()}

        yield self._start_container()
        yield self.spawn_procs()

        pId = yield self.procRegistry.get("provisioner")
        self.client = ProvisionerClient(pid=pId)

    @defer.inlineCallbacks
    def tearDown(self):
        yield self.shutdown_procs()
        yield self.teardown_store()
        yield self._stop_container()

    def setup_store(self):
        return defer.succeed(ProvisionerStore())

    def teardown_store(self):
        return defer.succeed(None)

    @defer.inlineCallbacks
    def test_provision_bad_dt(self):
        client = self.client
        notifier = self.notifier

        worker_node_count = 3
        deployable_type = 'this-doesnt-exist'
        nodes = {'head-node' : FakeLaunchItem(1, 'fake-site1', 'small', None),
                'worker-node' : FakeLaunchItem(worker_node_count,
                    'fake-site1', 'small', None)}

        launch_id = _new_id()

        node_ids = [node_id for node in nodes.itervalues()
                for node_id in node.instance_ids]
        self.assertEqual(len(node_ids), worker_node_count + 1)

        yield client.provision(launch_id, deployable_type, nodes, ('subscriber',))

        ok = yield notifier.wait_for_state(states.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        yield self.assertStoreNodeRecords(states.FAILED, *node_ids)
        yield self.assertStoreLaunchRecord(states.FAILED, launch_id)

    @defer.inlineCallbacks
    def test_provision_broker_error(self):
        client = self.client
        notifier = self.notifier

        worker_node_count = 3
        deployable_type = 'base-cluster'
        nodes = {'head-node' : FakeLaunchItem(1, 'fake-site1', 'small', None),
                'worker-node' : FakeLaunchItem(worker_node_count,
                    'fake-site1', 'small', None)}

        launch_id = _new_id()

        node_ids = [node_id for node in nodes.itervalues()
                for node_id in node.instance_ids]
        self.assertEqual(len(node_ids), worker_node_count + 1)

        self.context_client.create_error = BrokerError("fake failure")

        yield client.provision(launch_id, deployable_type, nodes, ('subscriber',))

        ok = yield notifier.wait_for_state(states.FAILED, node_ids)
        self.assertTrue(ok)
        self.assertTrue(notifier.assure_record_count(1))

        yield self.assertStoreNodeRecords(states.FAILED, *node_ids)
        yield self.assertStoreLaunchRecord(states.FAILED, launch_id)

    @defer.inlineCallbacks
    def test_dump_state(self):
        running_launch, running_nodes = make_launch_and_nodes(_new_id(), 10, states.RUNNING)
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        pending_launch, pending_nodes = make_launch_and_nodes(_new_id(), 3, states.PENDING)
        yield self.store.put_launch(pending_launch)
        yield self.store.put_nodes(pending_nodes)

        running_node_ids = [node['node_id'] for node in running_nodes]
        pending_node_ids = [node['node_id'] for node in pending_nodes]
        all_node_ids = running_node_ids + pending_node_ids

        yield self.client.dump_state(running_node_ids)
        ok = yield self.notifier.wait_for_state(states.RUNNING, nodes=running_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(running_nodes))

        yield self.client.dump_state(pending_node_ids)
        ok = yield self.notifier.wait_for_state(states.PENDING, nodes=pending_node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(all_node_ids))

        # we should have not gotten any dupe records yet
        self.assertTrue(self.notifier.assure_record_count(1))

        # empty dump request should dump nothing
        yield self.client.dump_state([])
        self.assertTrue(self.notifier.assure_record_count(1))

    @defer.inlineCallbacks
    def test_dump_state_unknown_node(self):
        node_ids = ["09ddd3f8-a5a5-4196-ac13-eab4d4b0c777"]
        subscribers = ["hello1_subscriber"]
        yield self.client.dump_state(node_ids, force_subscribe=subscribers[0])
        ok = yield self.notifier.wait_for_state(states.FAILED, nodes=node_ids)
        self.assertTrue(ok)
        self.assertEqual(len(self.notifier.nodes), len(node_ids))
        for node_id in node_ids:
            ok = yield self.notifier.assure_subscribers(node_id, subscribers)
            self.assertTrue(ok)

    @defer.inlineCallbacks
    def test_terminate(self):
        launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(launch_id, 10,
                                                              states.RUNNING,
                                                              site="fake-site1")
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        node_ids = [node['node_id'] for node in running_nodes]

        # terminate half of the nodes then the launch as a whole
        first_five = node_ids[:5]
        yield self.client.terminate_nodes(first_five)
        ok = yield self.notifier.wait_for_state(states.TERMINATED, nodes=first_five)
        self.assertTrue(ok)
        self.assertEqual(set(first_five), set(self.notifier.nodes))

        yield self.client.terminate_launches((launch_id,))
        ok = yield self.notifier.wait_for_state(states.TERMINATED, nodes=node_ids)
        self.assertTrue(ok)
        self.assertEqual(set(node_ids), set(self.notifier.nodes))
        # should be TERMINATING and TERMINATED record for each node
        self.assertTrue(self.notifier.assure_record_count(2))

        self.assertEqual(len(self.site_drivers['fake-site1'].destroyed),
                         len(node_ids))

    @defer.inlineCallbacks
    def test_terminate_all(self):
        # create a ton of launches
        launch_specs = [(30, 3, states.RUNNING), (50, 1, states.TERMINATED), (80, 1, states.RUNNING)]

        to_be_terminated_node_ids = []

        for launchcount, nodecount, state in launch_specs:
            for i in range(launchcount):
                launch_id = _new_id()
                launch, nodes = make_launch_and_nodes(
                    launch_id, nodecount, state, site="fake-site1")
                yield self.store.put_launch(launch)
                yield self.store.put_nodes(nodes)

                if state < states.TERMINATED:
                    to_be_terminated_node_ids.extend(node["node_id"] for node in nodes)

        log.debug("Expecting %d nodes to be terminated", len(to_be_terminated_node_ids))

        yield self.client.terminate_all(rpcwait=True)
        yield self.assertStoreNodeRecords(states.TERMINATED, *to_be_terminated_node_ids)

        ok = self.notifier.assure_state(states.TERMINATED, nodes=to_be_terminated_node_ids)
        self.assertTrue(ok)
        self.assertEqual(set(to_be_terminated_node_ids), set(self.notifier.nodes))

        self.assertEqual(len(self.site_drivers['fake-site1'].destroyed),
                         len(to_be_terminated_node_ids))

    @defer.inlineCallbacks
    def test_query(self):
        #default is non-rpc. should be None result
        res = yield self.client.query()
        self.assertEqual(res, None)

        #returns true in RPC case
        res = yield self.client.query(rpc=True)
        self.assertEqual(res, True)