Beispiel #1
0
class ExecutionEngineAgentPyonIntTest(IonIntegrationTestCase):

    _webserver = None

    @needs_eeagent
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2cei.yml")

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            "eeagent": {
                "heartbeat": 1,
                "slots": 100,
                "name": "pyon_eeagent",
                "launch_type": {"name": "pyon", "persistence_directory": self.persistence_directory},
            },
            "agent": {"resource_id": self.resource_id},
            "logging": {
                "loggers": {"eeagent": {"level": "DEBUG", "handlers": ["console"]}},
                "root": {"handlers": ["console"]},
            },
        }

        self._start_eeagent()

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent",
            config=self.agent_config,
        )
        log.info("Agent pid=%s.", str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info("Got eea client %s.", str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self._stop_webserver()
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    def _start_webserver(self, directory_to_serve, port=None):
        """ Start a webserver for testing code download
        Note: tries really hard to get a port, and if it can't use
        the suggested port, randomly picks another, and returns it
        """

        def log_message(self, format, *args):
            # swallow log massages
            pass

        class Server(HTTPServer):

            requests = 0

            def serve_forever(self):
                self._serving = 1
                while self._serving:
                    self.handle_request()
                    self.requests += 1

            def stop(self):
                self._serving = 0

        if port is None:
            port = 8008
        Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
        Handler.log_message = log_message

        for i in range(0, 100):
            try:
                self._webserver = Server(("localhost", port), Handler)
            except socket.error:
                print "port %s is in use, picking another" % port
                port = randint(8000, 10000)
                continue
            else:
                break

        self._web_glet = gevent.spawn(self._webserver.serve_forever)
        return port

    def _stop_webserver(self):
        if self._webserver is not None:
            self._web_glet.kill()

    def _enable_code_download(self, whitelist=None):

        if whitelist is None:
            whitelist = []

        self.container.terminate_process(self._eea_pid)
        self.agent_config["eeagent"]["code_download"] = {"enabled": True, "whitelist": whitelist}
        self._start_eeagent()

    def wait_for_state(self, upid, desired_state, timeout=30):
        attempts = 0
        last_state = None
        while timeout > attempts:
            try:
                state = self.eea_client.dump_state().result
            except Timeout:
                log.warn("Timeout calling EEAgent dump_state. retrying.")
                continue
            proc = get_proc_for_upid(state, upid)
            last_state = proc.get("state")
            if last_state == desired_state:
                return
            gevent.sleep(1)
            attempts += 1

        assert False, "Process %s took too long to get to %s, had %s" % (upid, desired_state, last_state)

    @needs_eeagent
    def test_basics(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_x"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcess"
        parameters = {"name": proc_name, "module": module, "cls": cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, "RUNNING"])

        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 1

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, "TERMINATED"])

        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 1

        self.eea_client.cleanup_process(u_pid, round)
        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 0

    @needs_eeagent
    def test_restart(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_x"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcess"
        parameters = {"name": proc_name, "module": module, "cls": cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, "RUNNING"])

        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 1

        # Start again with incremented round. eeagent should restart the process
        round += 1

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, "RUNNING"])

        state = self.eea_client.dump_state().result
        ee_round = state["processes"][0]["round"]
        assert round == int(ee_round)

        # TODO: this test is disabled, as the restart op is disabled
        # Run restart with incremented round. eeagent should restart the process
        # round += 1

        # self.eea_client.restart_process(u_pid, round)
        # self.wait_for_state(u_pid, [500, 'RUNNING'])

        # state = self.eea_client.dump_state().result
        # ee_round = state['processes'][0]['round']
        # assert round == int(ee_round)

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, "TERMINATED"])

        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 1

        self.eea_client.cleanup_process(u_pid, round)
        state = self.eea_client.dump_state().result
        assert len(state["processes"]) == 0

    @needs_eeagent
    def test_failing_process(self):
        u_pid = "testfail"
        round = 0
        run_type = "pyon"
        proc_name = "test_x"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcessFail"
        parameters = {"name": proc_name, "module": module, "cls": cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [850, "FAILED"])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_slow_to_start(self):
        upids = map(lambda i: str(uuid.uuid4().hex), range(0, 10))
        round = 0
        run_type = "pyon"
        proc_name = "test_x"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcessSlowStart"
        parameters = {"name": proc_name, "module": module, "cls": cls}
        for upid in upids:
            self.eea_client.launch_process(upid, round, run_type, parameters)

        for upid in upids:
            self.wait_for_state(upid, [500, "RUNNING"], timeout=60)

    @needs_eeagent
    def test_start_cancel(self):
        upid = str(uuid.uuid4().hex)
        round = 0
        run_type = "pyon"
        proc_name = "test_x"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcessSlowStart"
        parameters = {"name": proc_name, "module": module, "cls": cls}
        self.eea_client.launch_process(upid, round, run_type, parameters)
        self.wait_for_state(upid, [400, "PENDING"])
        self.eea_client.terminate_process(upid, round)
        self.wait_for_state(upid, [700, "TERMINATED"])

    @needs_eeagent
    def test_kill_and_revive(self):
        """test_kill_and_revive
        Ensure that when an eeagent dies, it pulls the processes it owned from
        persistence, and marks them as failed, so the PD can figure out what to
        do with them
        """
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_transform"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcess"
        parameters = {"name": proc_name, "module": module, "cls": cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, "RUNNING"])

        # Kill and restart eeagent. Also, kill proc started by eea to simulate
        # a killed container
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        proc_to_kill = self.container.proc_manager.procs_by_name.get(proc_name)
        self.assertIsNotNone(proc_to_kill)
        self.container.terminate_process(proc_to_kill.id)

        self._start_eeagent()

        self.assertNotEqual(old_eea_pid, self._eea_pid)

        self.wait_for_state(u_pid, [850, "FAILED"])

    @needs_eeagent
    def test_run_out_of_slots(self):
        """test_run_out_of_slots
        """
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        self.agent_config["eeagent"]["slots"] = 1
        self._start_eeagent()
        self.assertNotEqual(old_eea_pid, self._eea_pid)

        u_pid_0, u_pid_1 = "test0", "test1"
        round = 0
        run_type = "pyon"
        proc_name = "test_transform"
        module = "ion.agents.cei.test.test_eeagent"
        cls = "TestProcess"
        parameters = {"name": proc_name, "module": module, "cls": cls}

        self.eea_client.launch_process(u_pid_0, round, run_type, parameters)
        self.wait_for_state(u_pid_0, [500, "RUNNING"])

        self.eea_client.launch_process(u_pid_1, round, run_type, parameters)
        self.wait_for_state(u_pid_1, [900, "REJECTED"])

        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        self.agent_config["eeagent"]["slots"] = 1
        self._start_eeagent()
        self.assertNotEqual(old_eea_pid, self._eea_pid)

        self.wait_for_state(u_pid_0, [850, "FAILED"])
        self.wait_for_state(u_pid_1, [900, "REJECTED"])

    @needs_eeagent
    def test_download_code(self):

        self._enable_code_download(whitelist=["*"])

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_transform"
        module = "ion.my.module.to.download"
        module_uri = "file://%s/downloads/module_to_download.py" % get_this_directory()
        bad_module_uri = "file:///tmp/notreal/module_to_download.py"

        cls = "TestDownloadProcess"

        parameters = {"name": proc_name, "module": module, "module_uri": bad_module_uri, "cls": cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 404
        assert "Unable to download" in response.result

        parameters = {"name": proc_name, "module": module, "module_uri": module_uri, "cls": cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, "RUNNING"])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_whitelist(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_transform"
        module = "ion.my.module"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = "TestDownloadProcess"
        parameters = {"name": proc_name, "module": module, "module_uri": module_uri, "cls": cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "Code download not enabled" in response.result

        # Test no whitelist
        self._enable_code_download()

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test not matching
        self._enable_code_download(whitelist=["blork"])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test exact matching
        self._enable_code_download(whitelist=["localhost"])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, "RUNNING"])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        # Test wildcard
        self._enable_code_download(whitelist=["*"])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, "RUNNING"])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_caching(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        self._enable_code_download(["*"])
        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = "test_transform"
        module = "ion.my.module"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = "TestDownloadProcess"
        parameters = {"name": proc_name, "module": module, "module_uri": module_uri, "cls": cls}

        # Launch a process, check that webserver is hit
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, "RUNNING"])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        # Launch another process, check that webserver is still only hit once
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, "RUNNING"])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test5"
        round = 0
        run_type = "pyon"
        proc_name = "test_transformx"
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = "TestProcess"
        parameters = {"name": proc_name, "module": module, "module_uri": module_uri, "cls": cls}

        # Test that a module that is already available in tarball won't trigger a download
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, "RUNNING"])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test9"
        round = 0
        run_type = "pyon"
        proc_name = "test_transformx"
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = "TestProcessNotReal"
        parameters = {"name": proc_name, "module": module, "module_uri": module_uri, "cls": cls}

        # Test behaviour of a non existant class with no download
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [850, "FAILED"])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)
Beispiel #2
0
class HeartbeaterIntTest(IonIntegrationTestCase):
    @needs_eeagent
    def setUp(self):
        self._start_container()

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            "eeagent": {
                "heartbeat": "0.01",
                "slots": 100,
                "name": "pyon_eeagent",
                "launch_type": {"name": "pyon", "persistence_directory": self.persistence_directory},
            },
            "agent": {"resource_id": self.resource_id},
            "logging": {
                "loggers": {"eeagent": {"level": "DEBUG", "handlers": ["console"]}},
                "root": {"handlers": ["console"]},
            },
        }

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name)
        self.container = self.container_client._get_container_instance()

        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent",
            config=self.agent_config,
        )
        log.info("Agent pid=%s.", str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info("Got eea client %s.", str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode")
    def test_heartbeater(self):
        """test_heartbeater

        Test whether the eeagent waits until the eeagent listener is ready before sending
        a heartbeat to the PD
        """

        # beat_died is a list because of a hack to get around a limitation in python 2.7
        # See: http://stackoverflow.com/questions/8934772/local-var-referenced-before-assignment
        beat_died = [False]

        def heartbeat_callback(heartbeat, headers):

            eeagent_id = heartbeat["eeagent_id"]
            agent_client = SimpleResourceAgentClient(eeagent_id, name=eeagent_id, process=FakeProcess())
            ee_client = ExecutionEngineAgentClient(agent_client, timeout=2)

            try:
                ee_client.dump_state()
            except:
                log.exception("Heartbeat Failed!")
                beat_died[0] = True

        self.beat_subscriber = HeartbeatSubscriber(
            "heartbeat_queue", callback=heartbeat_callback, node=self.container.node
        )
        self.beat_subscriber.start()
        try:
            self._start_eeagent()
            for i in range(0, 5):
                if beat_died[0] is True:
                    assert False, "A Hearbeat callback wasn't able to contact the eeagent"
                gevent.sleep(0.5)
        finally:
            self.beat_subscriber.stop()
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest):
    """Run the basic int tests again, with a different environment
    """

    def setUp(self):
        self._start_container()
        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        app = dict(processapp=("process_dispatcher",
                               "ion.services.cei.process_dispatcher_service",
                               "ProcessDispatcherService"))
        self.container.start_app(app, config=pd_config)

        self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.process_definition = ProcessDefinition(name='test_process')
        self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher',
                                              'class': 'TestProcess'}
        self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition)
        self.event_queue = queue.Queue()

        self.event_sub = None

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': 1,
                'heartbeat_queue': 'hbeatq',
                'slots': 100,
                'name': 'pyon_eeagent',
                'node_id': 'somenodeid',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                    },
                },
            'agent': {'resource_id': self.resource_id},
        }

        #send a fake dt_state message to PD's dashi binding.
        dashi = get_dashi(uuid.uuid4().hex,
            pd_config['processdispatcher']['dashi_uri'],
            pd_config['processdispatcher']['dashi_exchange'])
        dt_state = dict(node_id="somenodeid", state=InstanceState.RUNNING,
            deployable_type="eeagent_pyon")
        dashi.fire(get_pd_dashi_name(), "dt_state", args=dt_state)

        self._eea_pid = self.container_client.spawn_process(name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

        if self.event_sub:
            self.event_sub.stop()
        self._stop_container()
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest):
    """Run the basic int tests again, with a different environment
    """

    def setUp(self):
        self.dashi = None
        self._start_container()
        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        app = dict(name="process_dispatcher", processapp=("process_dispatcher",
                               "ion.services.cei.process_dispatcher_service",
                               "ProcessDispatcherService"))
        self.container.start_app(app, config=pd_config)

        self.rr_cli = self.container.resource_registry

        self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.process_definition = ProcessDefinition(name='test_process')
        self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher',
                                              'class': 'TestProcess'}
        self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition)

        self._eea_pids = []
        self._tmpdirs = []

        self.dashi = get_dashi(uuid.uuid4().hex,
            pd_config['processdispatcher']['dashi_uri'],
            pd_config['processdispatcher']['dashi_exchange'])

        #send a fake node_state message to PD's dashi binding.
        self.node1_id = uuid.uuid4().hex
        self._send_node_state("engine1", self.node1_id)
        self._start_eeagent(self.node1_id)

        self.waiter = ProcessStateWaiter()

    def _send_node_state(self, engine_id, node_id=None):
        node_id = node_id or uuid.uuid4().hex
        node_state = dict(node_id=node_id, state=InstanceState.RUNNING,
            domain_id=domain_id_from_engine(engine_id))
        self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state)

    def _start_eeagent(self, node_id):
        persistence_dir = tempfile.mkdtemp()
        self._tmpdirs.append(persistence_dir)
        agent_config = _get_eeagent_config(node_id, persistence_dir)
        pid = self.container_client.spawn_process(name="eeagent",
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=agent_config)
        log.info('Agent pid=%s.', str(pid))
        self._eea_pids.append(pid)

    def tearDown(self):
        for pid in self._eea_pids:
            self.container.terminate_process(pid)
        for d in self._tmpdirs:
            shutil.rmtree(d)

        self.waiter.stop()
        if self.dashi:
            self.dashi.cancel()


    def test_requested_ee(self):

        # request non-default engine

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.WAITING)


        # request unknown engine, with NEVER queuing mode. The request
        # should be rejected.
        # verifies L4-CI-CEI-RQ52

        process_target = ProcessTarget(execution_engine_id="not-a-real-ee")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        rejected_pid = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=rejected_pid)

        self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED)

        # now add a node and eeagent for engine2. original process should leave
        # queue and start running
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine2", node2_id)
        self._start_eeagent(node2_id)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

        # spawn another process. it should start immediately.

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid2 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid2)

        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # one more with node exclusive

        process_target = ProcessTarget(execution_engine_id="engine2",
            node_exclusive="hats")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid3 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid3)

        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid)
        self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_node_exclusive(self):

        # the node_exclusive constraint is used to ensure multiple processes
        # of the same "kind" each get a VM exclusive of each other. Other
        # processes may run on these VMs, just not processes with the same
        # node_exclusive tag. Since we cannot directly query the contents
        # of each node in this test, we prove the capability by scheduling
        # processes one by one and checking their state.

        # verifies L4-CI-CEI-RQ121
        # verifies L4-CI-CEI-RQ57

        # first off, setUp() created a single node and eeagent.
        # We schedule two processes with the same "abc" node_exclusive
        # tag. Since there is only one node, the first process should run
        # and the second should be queued.

        process_target = ProcessTarget(execution_engine_id="engine1")
        process_target.node_exclusive = "abc"
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid1 = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid1)

        self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING)

        pid2 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING)

        # now demonstrate that the node itself is not full by launching
        # a third process without a node_exclusive tag -- it should start
        # immediately

        process_target.node_exclusive = None
        pid3 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # finally, add a second node to the engine. pid2 should be started
        # since there is an exclusive "abc" node free.
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine1", node2_id)
        self._start_eeagent(node2_id)
        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid1)
        self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_code_download(self):
        # create a process definition that has no URL; only module and class.
        process_definition_no_url = ProcessDefinition(name='test_process_nodownload')
        process_definition_no_url.executable = {'module': 'ion.my.test.process',
                'class': 'TestProcess'}
        process_definition_id_no_url = self.pd_cli.create_process_definition(process_definition_no_url)

        # create another that has a URL of the python file (this very file)
        # verifies L4-CI-CEI-RQ114
        url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py')
        process_definition = ProcessDefinition(name='test_process_download')
        process_definition.executable = {'module': 'ion.my.test.process',
                'class': 'TestProcess', 'url': url}
        process_definition_id = self.pd_cli.create_process_definition(process_definition)

        process_target = ProcessTarget()
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        self.waiter.start()

        # Test a module with no download fails
        pid_no_url = self.pd_cli.create_process(process_definition_id_no_url)

        self.pd_cli.schedule_process(process_definition_id_no_url,
            process_schedule, process_id=pid_no_url)

        self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED)

        # Test a module with a URL runs
        pid = self.pd_cli.create_process(process_definition_id)

        self.pd_cli.schedule_process(process_definition_id,
            process_schedule, process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest):
    """Run the basic int tests again, with a different environment
    """

    def setUp(self):
        self.dashi = None
        self._start_container()
        from pyon.public import CFG

        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        app = dict(name="process_dispatcher", processapp=("process_dispatcher",
                               "ion.services.cei.process_dispatcher_service",
                               "ProcessDispatcherService"))
        self.container.start_app(app, config=pd_config)

        self.rr_cli = self.container.resource_registry

        self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.process_definition = ProcessDefinition(name='test_process')
        self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher',
                                              'class': 'TestProcess'}
        self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition)

        self._eea_pids = []
        self._eea_pid_to_resource_id = {}
        self._eea_pid_to_persistence_dir = {}
        self._tmpdirs = []

        self.dashi = get_dashi(uuid.uuid4().hex,
            pd_config['processdispatcher']['dashi_uri'],
            pd_config['processdispatcher']['dashi_exchange'],
            sysname=CFG.get_safe("dashi.sysname")
            )

        #send a fake node_state message to PD's dashi binding.
        self.node1_id = uuid.uuid4().hex
        self._send_node_state("engine1", self.node1_id)
        self._initial_eea_pid = self._start_eeagent(self.node1_id)

        self.waiter = ProcessStateWaiter()

    def _send_node_state(self, engine_id, node_id=None):
        node_id = node_id or uuid.uuid4().hex
        node_state = dict(node_id=node_id, state=InstanceState.RUNNING,
            domain_id=domain_id_from_engine(engine_id))
        self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state)

    def _start_eeagent(self, node_id, resource_id=None, persistence_dir=None):
        if not persistence_dir:
            persistence_dir = tempfile.mkdtemp()
            self._tmpdirs.append(persistence_dir)
        resource_id = resource_id or uuid.uuid4().hex
        agent_config = _get_eeagent_config(node_id, persistence_dir,
            resource_id=resource_id)
        pid = self.container_client.spawn_process(name="eeagent",
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=agent_config)
        log.info('Agent pid=%s.', str(pid))
        self._eea_pids.append(pid)
        self._eea_pid_to_resource_id[pid] = resource_id
        self._eea_pid_to_persistence_dir[pid] = persistence_dir
        return pid

    def _kill_eeagent(self, pid):
        self.assertTrue(pid in self._eea_pids)
        self.container.terminate_process(pid)
        self._eea_pids.remove(pid)
        del self._eea_pid_to_resource_id[pid]
        del self._eea_pid_to_persistence_dir[pid]

    def tearDown(self):
        for pid in list(self._eea_pids):
            self._kill_eeagent(pid)
        for d in self._tmpdirs:
            shutil.rmtree(d)

        self.waiter.stop()
        if self.dashi:
            self.dashi.cancel()

    def test_requested_ee(self):

        # request non-default engine

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.WAITING)

        # request unknown engine, with NEVER queuing mode. The request
        # should be rejected.
        # verifies L4-CI-CEI-RQ52

        process_target = ProcessTarget(execution_engine_id="not-a-real-ee")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        rejected_pid = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=rejected_pid)

        self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED)

        # now add a node and eeagent for engine2. original process should leave
        # queue and start running
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine2", node2_id)
        self._start_eeagent(node2_id)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

        # spawn another process. it should start immediately.

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid2 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid2)

        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # one more with node exclusive

        process_target = ProcessTarget(execution_engine_id="engine2",
            node_exclusive="hats")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid3 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid3)

        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid)
        self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_node_exclusive(self):

        # the node_exclusive constraint is used to ensure multiple processes
        # of the same "kind" each get a VM exclusive of each other. Other
        # processes may run on these VMs, just not processes with the same
        # node_exclusive tag. Since we cannot directly query the contents
        # of each node in this test, we prove the capability by scheduling
        # processes one by one and checking their state.

        # verifies L4-CI-CEI-RQ121
        # verifies L4-CI-CEI-RQ57

        # first off, setUp() created a single node and eeagent.
        # We schedule two processes with the same "abc" node_exclusive
        # tag. Since there is only one node, the first process should run
        # and the second should be queued.

        process_target = ProcessTarget(execution_engine_id="engine1")
        process_target.node_exclusive = "abc"
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid1 = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid1)

        self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING)

        pid2 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING)

        # now demonstrate that the node itself is not full by launching
        # a third process without a node_exclusive tag -- it should start
        # immediately

        process_target.node_exclusive = None
        pid3 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # finally, add a second node to the engine. pid2 should be started
        # since there is an exclusive "abc" node free.
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine1", node2_id)
        self._start_eeagent(node2_id)
        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid1)
        self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_code_download(self):
        # create a process definition that has no URL; only module and class.
        process_definition_no_url = ProcessDefinition(name='test_process_nodownload')
        process_definition_no_url.executable = {'module': 'ion.my.test.process',
                'class': 'TestProcess'}
        process_definition_id_no_url = self.pd_cli.create_process_definition(process_definition_no_url)

        # create another that has a URL of the python file (this very file)
        # verifies L4-CI-CEI-RQ114
        url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py')
        process_definition = ProcessDefinition(name='test_process_download')
        process_definition.executable = {'module': 'ion.my.test.process',
                'class': 'TestProcess', 'url': url}
        process_definition_id = self.pd_cli.create_process_definition(process_definition)

        process_target = ProcessTarget()
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        self.waiter.start()

        # Test a module with no download fails
        pid_no_url = self.pd_cli.create_process(process_definition_id_no_url)

        self.pd_cli.schedule_process(process_definition_id_no_url,
            process_schedule, process_id=pid_no_url)

        self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED)

        # Test a module with a URL runs
        pid = self.pd_cli.create_process(process_definition_id)

        self.pd_cli.schedule_process(process_definition_id,
            process_schedule, process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

    def _add_test_process(self, restart_mode=None):
        process_schedule = ProcessSchedule()
        if restart_mode is not None:
            process_schedule.restart_mode = restart_mode
        pid = self.pd_cli.create_process(self.process_definition_id)

        pid_listen_name = "PDtestproc_%s" % uuid.uuid4().hex
        config = {'process': {'listen_name': pid_listen_name}}

        self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, process_id=pid, configuration=config)

        client = TestClient(to_name=pid_listen_name)
        return pid, client

    def test_restart(self):
        self.waiter.start()

        restartable_pids = []
        nonrestartable_pids = []
        clients = {}
        # start 10 processes with RestartMode.ALWAYS
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.ALWAYS)
            restartable_pids.append(pid)
            clients[pid] = client

        # and 10 processes with RestartMode.ABNORMAL
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.ABNORMAL)
            restartable_pids.append(pid)
            clients[pid] = client

        # and 10 with RestartMode.NEVER
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.NEVER)
            nonrestartable_pids.append(pid)
            clients[pid] = client

        all_pids = restartable_pids + nonrestartable_pids

        self.waiter.await_many_state_events(all_pids, ProcessStateEnum.RUNNING)

        for pid in all_pids:
            client = clients[pid]
            self.assertFalse(client.is_restart())
            self.assertEqual(client.count(), 1)

        # now kill the whole eeagent and restart it. processes should
        # show up as FAILED in the next heartbeat.
        resource_id = self._eea_pid_to_resource_id[self._initial_eea_pid]
        persistence_dir = self._eea_pid_to_persistence_dir[self._initial_eea_pid]
        log.debug("Restarting eeagent %s", self._initial_eea_pid)
        self._kill_eeagent(self._initial_eea_pid)

        # manually kill the processes to simulate a real container failure
        for pid in all_pids:
            self.container.terminate_process(pid)

        self._start_eeagent(self.node1_id, resource_id=resource_id,
            persistence_dir=persistence_dir)

        # wait for restartables to restart
        self.waiter.await_many_state_events(restartable_pids, ProcessStateEnum.RUNNING)

        # query the processes again. it should have restart mode config
        for pid in restartable_pids:
            client = clients[pid]
            self.assertTrue(client.is_restart())
            self.assertEqual(client.count(), 1)

        # meanwhile some procs should not have restarted
        for pid in nonrestartable_pids:
            proc = self.pd_cli.read_process(pid)
            self.assertEqual(proc.process_state, ProcessStateEnum.FAILED)

        # guard against extraneous events we were receiving as part of a bug:
        # processes restarting again after they were already restarted
        self.waiter.await_nothing(timeout=5)

    def test_idempotency(self):
        # ensure every operation can be safely retried
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS

        proc_name = 'myreallygoodname'
        pid = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start(pid)

        # note: if we import UNSCHEDULED state into ProcessStateEnum,
        # this assertion will need to change.
        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_state, ProcessStateEnum.REQUESTED)

        pid2 = self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, configuration={}, process_id=pid, name=proc_name)
        self.assertEqual(pid, pid2)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

        # repeating schedule is harmless
        pid2 = self.pd_cli.schedule_process(self.process_definition_id,
            process_schedule, configuration={}, process_id=pid, name=proc_name)
        self.assertEqual(pid, pid2)

        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_configuration, {})
        self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING)

        self.pd_cli.cancel_process(pid)
        self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED)

        # repeating cancel is harmless
        self.pd_cli.cancel_process(pid)
        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_configuration, {})
        self.assertEqual(proc.process_state, ProcessStateEnum.TERMINATED)
Beispiel #6
0
class ExecutionEngineAgentPyonIntTest(IonIntegrationTestCase):
    from ion.agents.cei.execution_engine_agent import ExecutionEngineAgentClient

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2cei.yml')

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
              'heartbeat': 0,
              'slots': 100,
              'name': 'pyon_eeagent',
              'launch_type': {
                'name': 'pyon',
                'persistence_directory': self.persistence_directory,
              },
            },
            'agent': {'resource_id': self.resource_id},
            'logging': {
            'loggers': {
              'eeagent': {
                'level': 'DEBUG',
                'handlers': ['console']
              }
            },
            'root': {
              'handlers': ['console']
            },
          }
        }

        self._start_eeagent()

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = ResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    def test_basics(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_kill_and_revive(self):
        """test_kill_and_revive
        Ensure that when an eeagent dies, it pulls the processes it owned from
        persistence, and marks them as failed, so the PD can figure out what to
        do with them
        """
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [500, 'RUNNING'])

        # Kill and restart eeagent. Also, kill proc started by eea to simulate
        # a killed container
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        proc_to_kill = self.container.proc_manager.procs_by_name.get(proc_name)
        self.assertIsNotNone(proc_to_kill)
        self.container.terminate_process(proc_to_kill.id)

        self._start_eeagent()

        self.assertNotEqual(old_eea_pid, self._eea_pid)

        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [850, 'FAILED'])
Beispiel #7
0
class HeartbeaterIntTest(IonIntegrationTestCase):

    @needs_eeagent
    def setUp(self):
        self._start_container()

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': "0.01",
                'slots': 100,
                'name': 'pyon_eeagent',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                }
            },
            'agent': {'resource_id': self.resource_id},
            'logging': {
                'loggers': {
                    'eeagent': {
                        'level': 'DEBUG',
                        'handlers': ['console']
                    }
                },
                'root': {
                    'handlers': ['console']
                },
            }
        }

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(
            node=self.container.node, name=self.container.name)
        self.container = self.container_client._get_container_instance()

        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_heartbeater(self):
        """test_heartbeater

        Test whether the eeagent waits until the eeagent listener is ready before sending
        a heartbeat to the PD
        """

        # beat_died is a list because of a hack to get around a limitation in python 2.7
        # See: http://stackoverflow.com/questions/8934772/local-var-referenced-before-assignment
        beat_died = [False]

        def heartbeat_callback(heartbeat, headers):

            eeagent_id = heartbeat['eeagent_id']
            agent_client = SimpleResourceAgentClient(eeagent_id, name=eeagent_id, process=FakeProcess())
            ee_client = ExecutionEngineAgentClient(agent_client, timeout=2)

            try:
                ee_client.dump_state()
            except:
                log.exception("Heartbeat Failed!")
                beat_died[0] = True

        self.beat_subscriber = HeartbeatSubscriber("heartbeat_queue",
            callback=heartbeat_callback, node=self.container.node)
        self.beat_subscriber.start()
        try:
            self._start_eeagent()
            for i in range(0, 5):
                if beat_died[0] is True:
                    assert False, "A Hearbeat callback wasn't able to contact the eeagent"
                gevent.sleep(0.5)
        finally:
            self.beat_subscriber.stop()
class HeartbeaterIntTest(IonIntegrationTestCase):

    @needs_eeagent
    def setUp(self):
        self._start_container()

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': 300,
                'slots': 100,
                'name': 'pyon_eeagent',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                }
            },
            'agent': {'resource_id': self.resource_id},
            'logging': {
                'loggers': {
                    'eeagent': {
                        'level': 'DEBUG',
                        'handlers': ['console']
                    }
                },
                'root': {
                    'handlers': ['console']
                },
            }
        }

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(
            node=self.container.node, name=self.container.name)
        self.container = self.container_client._get_container_instance()

        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_heartbeater(self):
        """test_heartbeater

        Test whether the eeagent waits until the eeagent listener is ready before sending
        a heartbeat to the PD
        """

        beat_died = threading.Event()
        beat_succeeded = threading.Event()

        def heartbeat_callback(heartbeat, headers):

            eeagent_id = heartbeat['eeagent_id']
            agent_client = SimpleResourceAgentClient(eeagent_id, name=eeagent_id, process=FakeProcess())
            ee_client = ExecutionEngineAgentClient(agent_client, timeout=10)

            try:
                ee_client.dump_state()
                beat_succeeded.set()
            except:
                log.exception("Heartbeat Failed!")
                beat_died.set()

        self.beat_subscriber = HeartbeatSubscriber("heartbeat_queue",
            callback=heartbeat_callback, node=self.container.node)
        self.beat_subscriber.start()

        self._start_eeagent()
        success = beat_succeeded.wait(20)
        if success is False:
            died = beat_died.wait(20)
            assert died is False, "A Hearbeat callback wasn't able to contact the eeagent"
Beispiel #9
0
class ExecutionEngineAgentPyonIntTest(IonIntegrationTestCase):

    _webserver = None

    @needs_eeagent
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2cei.yml')

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': 1,
                'slots': 100,
                'name': 'pyon_eeagent',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                }
            },
            'agent': {'resource_id': self.resource_id},
            'logging': {
                'loggers': {
                    'eeagent': {
                        'level': 'DEBUG',
                        'handlers': ['console']
                    }
                },
                'root': {
                    'handlers': ['console']
                },
            }
        }

        self._start_eeagent()

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(
            node=self.container.node, name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self._stop_webserver()
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    def _start_webserver(self, directory_to_serve, port=None):
        """ Start a webserver for testing code download
        Note: tries really hard to get a port, and if it can't use
        the suggested port, randomly picks another, and returns it
        """
        def log_message(self, format, *args):
            #swallow log massages
            pass

        class Server(HTTPServer):

            requests = 0

            def serve_forever(self):
                self._serving = 1
                while self._serving:
                    self.handle_request()
                    self.requests += 1

            def stop(self):
                self._serving = 0

        if port is None:
            port = 8008
        Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
        Handler.log_message = log_message

        for i in range(0, 100):
            try:
                self._webserver = Server(("localhost", port), Handler)
            except socket.error:
                print "port %s is in use, picking another" % port
                port = randint(8000, 10000)
                continue
            else:
                break

        self._web_glet = gevent.spawn(self._webserver.serve_forever)
        return port

    def _stop_webserver(self):
        if self._webserver is not None:
            self._web_glet.kill()

    def _enable_code_download(self, whitelist=None):

        if whitelist is None:
            whitelist = []

        self.container.terminate_process(self._eea_pid)
        self.agent_config['eeagent']['code_download'] = {
            'enabled': True,
            'whitelist': whitelist
        }
        self._start_eeagent()

    def wait_for_state(self, upid, desired_state, timeout=30):
        attempts = 0
        last_state = None
        while timeout > attempts:
            try:
                state = self.eea_client.dump_state().result
            except Timeout:
                log.warn("Timeout calling EEAgent dump_state. retrying.")
                continue
            proc = get_proc_for_upid(state, upid)
            last_state = proc.get('state')
            if last_state == desired_state:
                return
            gevent.sleep(1)
            attempts += 1

        assert False, "Process %s took too long to get to %s, had %s" % (upid, desired_state, last_state)

    @needs_eeagent
    def test_basics(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, 'TERMINATED'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        self.eea_client.cleanup_process(u_pid, round)
        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 0

    @needs_eeagent
    def test_duplicate(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, 'TERMINATED'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        self.eea_client.cleanup_process(u_pid, round)
        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 0

    @needs_eeagent
    def test_restart(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        # Start again with incremented round. eeagent should restart the process
        round += 1

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        state = self.eea_client.dump_state().result
        ee_round = state['processes'][0]['round']
        assert round == int(ee_round)

        # TODO: this test is disabled, as the restart op is disabled
        # Run restart with incremented round. eeagent should restart the process
        #round += 1

        #self.eea_client.restart_process(u_pid, round)
        #self.wait_for_state(u_pid, [500, 'RUNNING'])

        #state = self.eea_client.dump_state().result
        #ee_round = state['processes'][0]['round']
        #assert round == int(ee_round)

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, 'TERMINATED'])

        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 1

        self.eea_client.cleanup_process(u_pid, round)
        state = self.eea_client.dump_state().result
        assert len(state['processes']) == 0

    @needs_eeagent
    def test_failing_process(self):
        u_pid = "testfail"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessFail'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [850, 'FAILED'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_slow_to_start(self):
        upids = map(lambda i: str(uuid.uuid4().hex), range(0, 10))
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessSlowStart'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        for upid in upids:
            self.eea_client.launch_process(upid, round, run_type, parameters)

        for upid in upids:
            self.wait_for_state(upid, [500, 'RUNNING'], timeout=60)

    @needs_eeagent
    def test_start_cancel(self):
        upid = str(uuid.uuid4().hex)
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessSlowStart'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(upid, round, run_type, parameters)
        self.wait_for_state(upid, [400, 'PENDING'])
        self.eea_client.terminate_process(upid, round)
        self.wait_for_state(upid, [700, 'TERMINATED'])

    @needs_eeagent
    def test_kill_and_revive(self):
        """test_kill_and_revive
        Ensure that when an eeagent dies, it pulls the processes it owned from
        persistence, and marks them as failed, so the PD can figure out what to
        do with them
        """
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        # Kill and restart eeagent. Also, kill proc started by eea to simulate
        # a killed container
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        proc_to_kill = self.container.proc_manager.procs_by_name.get(proc_name)
        self.assertIsNotNone(proc_to_kill)
        self.container.terminate_process(proc_to_kill.id)

        self._start_eeagent()

        self.assertNotEqual(old_eea_pid, self._eea_pid)

        self.wait_for_state(u_pid, [850, 'FAILED'])

    @needs_eeagent
    def test_run_out_of_slots(self):
        """test_run_out_of_slots
        """
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        self.agent_config['eeagent']['slots'] = 1
        self._start_eeagent()
        self.assertNotEqual(old_eea_pid, self._eea_pid)

        u_pid_0, u_pid_1 = "test0", "test1"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}

        self.eea_client.launch_process(u_pid_0, round, run_type, parameters)
        self.wait_for_state(u_pid_0, [500, 'RUNNING'])

        self.eea_client.launch_process(u_pid_1, round, run_type, parameters)
        self.wait_for_state(u_pid_1, [900, 'REJECTED'])

        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        self.agent_config['eeagent']['slots'] = 1
        self._start_eeagent()
        self.assertNotEqual(old_eea_pid, self._eea_pid)

        self.wait_for_state(u_pid_0, [850, 'FAILED'])
        self.wait_for_state(u_pid_1, [900, 'REJECTED'])

    @needs_eeagent
    def test_download_code(self):

        self._enable_code_download(whitelist=['*'])

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module.to.download"
        module_uri = 'file://%s/downloads/module_to_download.py' % get_this_directory()
        bad_module_uri = 'file:///tmp/notreal/module_to_download.py'

        cls = 'TestDownloadProcess'

        parameters = {'name': proc_name, 'module': module, 'module_uri': bad_module_uri, 'cls': cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 404
        assert "Unable to download" in response.result

        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}
        round += 1
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_whitelist(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = 'TestDownloadProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "Code download not enabled" in response.result

        # Test no whitelist
        self._enable_code_download()

        round += 1
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test not matching
        self._enable_code_download(whitelist=['blork'])

        round += 1
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test exact matching
        self._enable_code_download(whitelist=['localhost'])

        round += 1
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        # Test wildcard
        self._enable_code_download(whitelist=['*'])

        round += 1
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_caching(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        self._enable_code_download(['*'])
        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = 'TestDownloadProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Launch a process, check that webserver is hit
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        # Launch another process, check that webserver is still only hit once
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test5"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transformx'
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Test that a module that is already available in tarball won't trigger a download
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test9"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transformx'
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/ion/agents/cei/test/downloads/module_to_download.py" % http_port
        cls = 'TestProcessNotReal'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Test behaviour of a non existant class with no download
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [850, 'FAILED'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        get_proc_for_upid(state, u_pid)
Beispiel #10
0
class ExecutionEngineAgentPyonIntTest(IonIntegrationTestCase):

    _webserver = None

    @needs_eeagent
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2cei.yml')

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
              'heartbeat': 0,
              'slots': 100,
              'name': 'pyon_eeagent',
              'launch_type': {
                'name': 'pyon',
                'persistence_directory': self.persistence_directory,
              }
            },
            'agent': {'resource_id': self.resource_id},
            'logging': {
            'loggers': {
              'eeagent': {
                'level': 'DEBUG',
                'handlers': ['console']
              }
            },
            'root': {
              'handlers': ['console']
            },
          }
        }

        self._start_eeagent()

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node,
            name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent", config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self._stop_webserver()
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    def _start_webserver(self, directory_to_serve, port=None):
        """ Start a webserver for testing code download
        Note: tries really hard to get a port, and if it can't use
        the suggested port, randomly picks another, and returns it
        """
        def log_message(self, format, *args):
            #swallow log massages
            pass

        class Server(HTTPServer):

            requests = 0

            def serve_forever(self):
                self._serving = 1
                while self._serving:
                    self.handle_request()
                    self.requests += 1

            def stop(self):
                self._serving = 0

        if port is None:
            port = 8008
        self.old_cwd = os.getcwd()
        os.chdir(directory_to_serve)
        Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
        Handler.log_message = log_message

        for i in range(0, 100):
            try:
                self._webserver = Server(("localhost", port), Handler)
            except socket.error:
                print "port %s is in use, picking another" % port
                port = randint(8000, 10000)
                continue
            else:
                break

        self._web_glet = gevent.spawn(self._webserver.serve_forever)
        return port

    def _stop_webserver(self):
        if self._webserver is not None:
            self._web_glet.kill()
            os.chdir(self.old_cwd)

    def _enable_code_download(self, whitelist=None):

        if whitelist is None:
            whitelist = []

        self.container.terminate_process(self._eea_pid)
        self.agent_config['eeagent']['code_download'] = {
            'enabled': True,
            'whitelist': whitelist
        }
        self._start_eeagent()

    def wait_for_state(self, upid, desired_state, timeout=30):
        attempts = 0
        last_state = None
        while timeout > attempts:
            state = self.eea_client.dump_state().result
            proc = get_proc_for_upid(state, upid)
            last_state = proc.get('state')
            if last_state == desired_state:
                return
            gevent.sleep(1)
            attempts += 1

        assert False, "Process %s took too long to get to %s, had %s" % (upid, desired_state, last_state)

    @needs_eeagent
    def test_basics(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}

        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        self.wait_for_state(u_pid, [700, 'TERMINATED'])

    @needs_eeagent
    def test_failing_process(self):
        u_pid = "testfail"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessFail'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [850, 'FAILED'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_slow_to_start(self):
        upids = map(lambda i: str(uuid.uuid4().hex), range(0, 10))
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessSlowStart'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        for upid in upids:
            self.eea_client.launch_process(upid, round, run_type, parameters)

        for upid in upids:
            self.wait_for_state(upid, [500, 'RUNNING'], timeout=60)

    @needs_eeagent
    def test_start_cancel(self):
        upid = str(uuid.uuid4().hex)
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcessSlowStart'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(upid, round, run_type, parameters)
        self.wait_for_state(upid, [400, 'PENDING'])
        self.eea_client.terminate_process(upid, round)
        self.wait_for_state(upid, [700, 'TERMINATED'])


    @needs_eeagent
    def test_kill_and_revive(self):
        """test_kill_and_revive
        Ensure that when an eeagent dies, it pulls the processes it owned from
        persistence, and marks them as failed, so the PD can figure out what to
        do with them
        """
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        # Kill and restart eeagent. Also, kill proc started by eea to simulate
        # a killed container
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        proc_to_kill = self.container.proc_manager.procs_by_name.get(proc_name)
        self.assertIsNotNone(proc_to_kill)
        self.container.terminate_process(proc_to_kill.id)

        self._start_eeagent()

        self.assertNotEqual(old_eea_pid, self._eea_pid)

        self.wait_for_state(u_pid, [850, 'FAILED'])

    @needs_eeagent
    def test_download_code(self):

        self._enable_code_download(whitelist=['*'])

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module.to.download"
        module_uri = 'file://%s/downloads/module_to_download.py' % get_this_directory()
        bad_module_uri = 'file:///tmp/notreal/module_to_download.py'

        cls = 'TestDownloadProcess'

        parameters = {'name': proc_name, 'module': module, 'module_uri': bad_module_uri, 'cls': cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 404
        assert "Unable to download" in response.result

        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_whitelist(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module"
        module_uri = "http://localhost:%s/module_to_download.py" % http_port
        cls = 'TestDownloadProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "Code download not enabled" in response.result

        # Test no whitelist
        self._enable_code_download()

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        print response
        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test not matching
        self._enable_code_download(whitelist=['blork'])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        assert response.status == 401
        assert "not in code_download whitelist" in response.result

        # Test exact matching
        self._enable_code_download(whitelist=['localhost'])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        # Test wildcard
        self._enable_code_download(whitelist=['*'])

        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_caching(self):

        downloads_directory = os.path.join(get_this_directory(), "downloads")
        http_port = 8910
        http_port = self._start_webserver(downloads_directory, port=http_port)

        while self._webserver is None:
            print "Waiting for webserver to come up"
            gevent.sleep(1)

        self._enable_code_download(['*'])
        assert self._webserver.requests == 0

        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = "ion.my.module"
        module_uri = "http://localhost:%s/module_to_download.py" % http_port
        cls = 'TestDownloadProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Launch a process, check that webserver is hit
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1


        # Launch another process, check that webserver is still only hit once
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)

        self.wait_for_state(u_pid, [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test5"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transformx'
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/module_to_download.py" % http_port
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Test that a module that is already available in tarball won't trigger a download
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [500, 'RUNNING'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        assert self._webserver.requests == 1

        u_pid = "test9"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transformx'
        module = "ion.agents.cei.test.test_eeagent"
        module_uri = "http://localhost:%s/module_to_download.py" % http_port
        cls = 'TestProcessNotReal'
        parameters = {'name': proc_name, 'module': module, 'module_uri': module_uri, 'cls': cls}

        # Test behaviour of a non existant class with no download
        response = self.eea_client.launch_process(u_pid, round, run_type, parameters)
        self.wait_for_state(u_pid, [850, 'FAILED'])
        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest):
    """Run the basic int tests again, with a different environment
    """
    def setUp(self):
        self.dashi = None
        self._start_container()
        from pyon.public import CFG

        self.container_client = ContainerAgentClient(node=self.container.node,
                                                     name=self.container.name)
        self.container = self.container_client._get_container_instance()

        app = dict(name="process_dispatcher",
                   processapp=("process_dispatcher",
                               "ion.services.cei.process_dispatcher_service",
                               "ProcessDispatcherService"))
        self.container.start_app(app, config=pd_config)

        self.rr_cli = self.container.resource_registry

        self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.process_definition = ProcessDefinition(name='test_process')
        self.process_definition.executable = {
            'module': 'ion.services.cei.test.test_process_dispatcher',
            'class': 'TestProcess'
        }
        self.process_definition_id = self.pd_cli.create_process_definition(
            self.process_definition)

        self._eea_pids = []
        self._eea_pid_to_resource_id = {}
        self._eea_pid_to_persistence_dir = {}
        self._tmpdirs = []

        self.dashi = get_dashi(
            uuid.uuid4().hex,
            pd_config['processdispatcher']['dashi_uri'],
            pd_config['processdispatcher']['dashi_exchange'],
            sysname=CFG.get_safe("dashi.sysname"))

        #send a fake node_state message to PD's dashi binding.
        self.node1_id = uuid.uuid4().hex
        self._send_node_state("engine1", self.node1_id)
        self._initial_eea_pid = self._start_eeagent(self.node1_id)

        self.waiter = ProcessStateWaiter()

    def _send_node_state(self, engine_id, node_id=None):
        node_id = node_id or uuid.uuid4().hex
        node_state = dict(node_id=node_id,
                          state=InstanceState.RUNNING,
                          domain_id=domain_id_from_engine(engine_id))
        self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state)

    def _start_eeagent(self, node_id, resource_id=None, persistence_dir=None):
        if not persistence_dir:
            persistence_dir = tempfile.mkdtemp()
            self._tmpdirs.append(persistence_dir)
        resource_id = resource_id or uuid.uuid4().hex
        agent_config = _get_eeagent_config(node_id,
                                           persistence_dir,
                                           resource_id=resource_id)
        pid = self.container_client.spawn_process(
            name="eeagent",
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent",
            config=agent_config)
        log.info('Agent pid=%s.', str(pid))
        self._eea_pids.append(pid)
        self._eea_pid_to_resource_id[pid] = resource_id
        self._eea_pid_to_persistence_dir[pid] = persistence_dir
        return pid

    def _kill_eeagent(self, pid):
        self.assertTrue(pid in self._eea_pids)
        self.container.terminate_process(pid)
        self._eea_pids.remove(pid)
        del self._eea_pid_to_resource_id[pid]
        del self._eea_pid_to_persistence_dir[pid]

    def tearDown(self):
        for pid in list(self._eea_pids):
            self._kill_eeagent(pid)
        for d in self._tmpdirs:
            shutil.rmtree(d)

        self.waiter.stop()
        if self.dashi:
            self.dashi.cancel()

    def test_requested_ee(self):

        # request non-default engine

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.WAITING)

        # request unknown engine, with NEVER queuing mode. The request
        # should be rejected.
        # verifies L4-CI-CEI-RQ52

        process_target = ProcessTarget(execution_engine_id="not-a-real-ee")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        rejected_pid = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=rejected_pid)

        self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED)

        # now add a node and eeagent for engine2. original process should leave
        # queue and start running
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine2", node2_id)
        self._start_eeagent(node2_id)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

        # spawn another process. it should start immediately.

        process_target = ProcessTarget(execution_engine_id="engine2")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid2 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid2)

        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # one more with node exclusive

        process_target = ProcessTarget(execution_engine_id="engine2",
                                       node_exclusive="hats")
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.NEVER
        process_schedule.target = process_target

        pid3 = self.pd_cli.create_process(self.process_definition_id)

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid3)

        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid)
        self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_node_exclusive(self):

        # the node_exclusive constraint is used to ensure multiple processes
        # of the same "kind" each get a VM exclusive of each other. Other
        # processes may run on these VMs, just not processes with the same
        # node_exclusive tag. Since we cannot directly query the contents
        # of each node in this test, we prove the capability by scheduling
        # processes one by one and checking their state.

        # verifies L4-CI-CEI-RQ121
        # verifies L4-CI-CEI-RQ57

        # first off, setUp() created a single node and eeagent.
        # We schedule two processes with the same "abc" node_exclusive
        # tag. Since there is only one node, the first process should run
        # and the second should be queued.

        process_target = ProcessTarget(execution_engine_id="engine1")
        process_target.node_exclusive = "abc"
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        pid1 = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start()

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid1)

        self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING)

        pid2 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING)

        # now demonstrate that the node itself is not full by launching
        # a third process without a node_exclusive tag -- it should start
        # immediately

        process_target.node_exclusive = None
        pid3 = self.pd_cli.create_process(self.process_definition_id)
        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING)

        # finally, add a second node to the engine. pid2 should be started
        # since there is an exclusive "abc" node free.
        node2_id = uuid.uuid4().hex
        self._send_node_state("engine1", node2_id)
        self._start_eeagent(node2_id)
        self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING)

        # kill the processes for good
        self.pd_cli.cancel_process(pid1)
        self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid2)
        self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED)
        self.pd_cli.cancel_process(pid3)
        self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED)

    def test_code_download(self):
        # create a process definition that has no URL; only module and class.
        process_definition_no_url = ProcessDefinition(
            name='test_process_nodownload')
        process_definition_no_url.executable = {
            'module': 'ion.my.test.process',
            'class': 'TestProcess'
        }
        process_definition_id_no_url = self.pd_cli.create_process_definition(
            process_definition_no_url)

        # create another that has a URL of the python file (this very file)
        # verifies L4-CI-CEI-RQ114
        url = "file://%s" % os.path.join(os.path.dirname(__file__),
                                         'test_process_dispatcher.py')
        process_definition = ProcessDefinition(name='test_process_download')
        process_definition.executable = {
            'module': 'ion.my.test.process',
            'class': 'TestProcess',
            'url': url
        }
        process_definition_id = self.pd_cli.create_process_definition(
            process_definition)

        process_target = ProcessTarget()
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS
        process_schedule.target = process_target

        self.waiter.start()

        # Test a module with no download fails
        pid_no_url = self.pd_cli.create_process(process_definition_id_no_url)

        self.pd_cli.schedule_process(process_definition_id_no_url,
                                     process_schedule,
                                     process_id=pid_no_url)

        self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED)

        # Test a module with a URL runs
        pid = self.pd_cli.create_process(process_definition_id)

        self.pd_cli.schedule_process(process_definition_id,
                                     process_schedule,
                                     process_id=pid)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

    def _add_test_process(self, restart_mode=None):
        process_schedule = ProcessSchedule()
        if restart_mode is not None:
            process_schedule.restart_mode = restart_mode
        pid = self.pd_cli.create_process(self.process_definition_id)

        pid_listen_name = "PDtestproc_%s" % uuid.uuid4().hex
        config = {'process': {'listen_name': pid_listen_name}}

        self.pd_cli.schedule_process(self.process_definition_id,
                                     process_schedule,
                                     process_id=pid,
                                     configuration=config)

        client = TestClient(to_name=pid_listen_name)
        return pid, client

    def test_restart(self):
        self.waiter.start()

        restartable_pids = []
        nonrestartable_pids = []
        clients = {}
        # start 10 processes with RestartMode.ALWAYS
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.ALWAYS)
            restartable_pids.append(pid)
            clients[pid] = client

        # and 10 processes with RestartMode.ABNORMAL
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.ABNORMAL)
            restartable_pids.append(pid)
            clients[pid] = client

        # and 10 with RestartMode.NEVER
        for _ in range(10):
            pid, client = self._add_test_process(ProcessRestartMode.NEVER)
            nonrestartable_pids.append(pid)
            clients[pid] = client

        all_pids = restartable_pids + nonrestartable_pids

        self.waiter.await_many_state_events(all_pids, ProcessStateEnum.RUNNING)

        for pid in all_pids:
            client = clients[pid]
            self.assertFalse(client.is_restart())
            self.assertEqual(client.count(), 1)

        # now kill the whole eeagent and restart it. processes should
        # show up as FAILED in the next heartbeat.
        resource_id = self._eea_pid_to_resource_id[self._initial_eea_pid]
        persistence_dir = self._eea_pid_to_persistence_dir[
            self._initial_eea_pid]
        log.debug("Restarting eeagent %s", self._initial_eea_pid)
        self._kill_eeagent(self._initial_eea_pid)

        # manually kill the processes to simulate a real container failure
        for pid in all_pids:
            self.container.terminate_process(pid)

        self._start_eeagent(self.node1_id,
                            resource_id=resource_id,
                            persistence_dir=persistence_dir)

        # wait for restartables to restart
        self.waiter.await_many_state_events(restartable_pids,
                                            ProcessStateEnum.RUNNING)

        # query the processes again. it should have restart mode config
        for pid in restartable_pids:
            client = clients[pid]
            self.assertTrue(client.is_restart())
            self.assertEqual(client.count(), 1)

        # meanwhile some procs should not have restarted
        for pid in nonrestartable_pids:
            proc = self.pd_cli.read_process(pid)
            self.assertEqual(proc.process_state, ProcessStateEnum.FAILED)

        # guard against extraneous events we were receiving as part of a bug:
        # processes restarting again after they were already restarted
        self.waiter.await_nothing(timeout=5)

    def test_idempotency(self):
        # ensure every operation can be safely retried
        process_schedule = ProcessSchedule()
        process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS

        proc_name = 'myreallygoodname'
        pid = self.pd_cli.create_process(self.process_definition_id)
        self.waiter.start(pid)

        # note: if we import UNSCHEDULED state into ProcessStateEnum,
        # this assertion will need to change.
        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_state, ProcessStateEnum.REQUESTED)

        pid2 = self.pd_cli.schedule_process(self.process_definition_id,
                                            process_schedule,
                                            configuration={},
                                            process_id=pid,
                                            name=proc_name)
        self.assertEqual(pid, pid2)

        self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)

        # repeating schedule is harmless
        pid2 = self.pd_cli.schedule_process(self.process_definition_id,
                                            process_schedule,
                                            configuration={},
                                            process_id=pid,
                                            name=proc_name)
        self.assertEqual(pid, pid2)

        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_configuration, {})
        self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING)

        self.pd_cli.cancel_process(pid)
        self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED)

        # repeating cancel is harmless
        self.pd_cli.cancel_process(pid)
        proc = self.pd_cli.read_process(pid)
        self.assertEqual(proc.process_id, pid)
        self.assertEqual(proc.process_configuration, {})
        self.assertEqual(proc.process_state, ProcessStateEnum.TERMINATED)
Beispiel #12
0
class HeartbeaterIntTest(IonIntegrationTestCase):
    @needs_eeagent
    def setUp(self):
        self._start_container()

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': 300,
                'slots': 100,
                'name': 'pyon_eeagent',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                }
            },
            'agent': {
                'resource_id': self.resource_id
            },
            'logging': {
                'loggers': {
                    'eeagent': {
                        'level': 'DEBUG',
                        'handlers': ['console']
                    }
                },
                'root': {
                    'handlers': ['console']
                },
            }
        }

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node,
                                                     name=self.container.name)
        self.container = self.container_client._get_container_instance()

        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent",
            config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = SimpleResourceAgentClient(
            self.resource_id, process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_heartbeater(self):
        """test_heartbeater

        Test whether the eeagent waits until the eeagent listener is ready before sending
        a heartbeat to the PD
        """

        beat_died = threading.Event()
        beat_succeeded = threading.Event()

        def heartbeat_callback(heartbeat, headers):

            eeagent_id = heartbeat['eeagent_id']
            agent_client = SimpleResourceAgentClient(eeagent_id,
                                                     name=eeagent_id,
                                                     process=FakeProcess())
            ee_client = ExecutionEngineAgentClient(agent_client, timeout=10)

            try:
                ee_client.dump_state()
                beat_succeeded.set()
            except:
                log.exception("Heartbeat Failed!")
                beat_died.set()

        self.beat_subscriber = HeartbeatSubscriber("heartbeat_queue",
                                                   callback=heartbeat_callback,
                                                   node=self.container.node)
        self.beat_subscriber.start()

        self._start_eeagent()
        success = beat_succeeded.wait(20)
        if success is False:
            died = beat_died.wait(20)
            assert died is False, "A Hearbeat callback wasn't able to contact the eeagent"
Beispiel #13
0
class ExecutionEngineAgentPyonIntTest(IonIntegrationTestCase):
    from ion.agents.cei.execution_engine_agent import ExecutionEngineAgentClient

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2cei.yml')

        self.resource_id = "eeagent_123456789"
        self._eea_name = "eeagent"

        self.persistence_directory = tempfile.mkdtemp()

        self.agent_config = {
            'eeagent': {
                'heartbeat': 0,
                'slots': 100,
                'name': 'pyon_eeagent',
                'launch_type': {
                    'name': 'pyon',
                    'persistence_directory': self.persistence_directory,
                },
            },
            'agent': {
                'resource_id': self.resource_id
            },
            'logging': {
                'loggers': {
                    'eeagent': {
                        'level': 'DEBUG',
                        'handlers': ['console']
                    }
                },
                'root': {
                    'handlers': ['console']
                },
            }
        }

        self._start_eeagent()

    def _start_eeagent(self):
        self.container_client = ContainerAgentClient(node=self.container.node,
                                                     name=self.container.name)
        self.container = self.container_client._get_container_instance()

        # Start eeagent.
        self._eea_pid = self.container_client.spawn_process(
            name=self._eea_name,
            module="ion.agents.cei.execution_engine_agent",
            cls="ExecutionEngineAgent",
            config=self.agent_config)
        log.info('Agent pid=%s.', str(self._eea_pid))

        # Start a resource agent client to talk with the instrument agent.
        self._eea_pyon_client = ResourceAgentClient(self.resource_id,
                                                    process=FakeProcess())
        log.info('Got eea client %s.', str(self._eea_pyon_client))

        self.eea_client = ExecutionEngineAgentClient(self._eea_pyon_client)

    def tearDown(self):
        self.container.terminate_process(self._eea_pid)
        shutil.rmtree(self.persistence_directory)

    @needs_eeagent
    def test_basics(self):
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_x'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [500, 'RUNNING'])

        self.eea_client.terminate_process(u_pid, round)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

    @needs_eeagent
    def test_kill_and_revive(self):
        """test_kill_and_revive
        Ensure that when an eeagent dies, it pulls the processes it owned from
        persistence, and marks them as failed, so the PD can figure out what to
        do with them
        """
        u_pid = "test0"
        round = 0
        run_type = "pyon"
        proc_name = 'test_transform'
        module = 'ion.agents.cei.test.test_eeagent'
        cls = 'TestProcess'
        parameters = {'name': proc_name, 'module': module, 'cls': cls}
        self.eea_client.launch_process(u_pid, round, run_type, parameters)
        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [500, 'RUNNING'])

        # Kill and restart eeagent. Also, kill proc started by eea to simulate
        # a killed container
        old_eea_pid = str(self._eea_pid)
        self.container.terminate_process(self._eea_pid)
        proc_to_kill = self.container.proc_manager.procs_by_name.get(proc_name)
        self.assertIsNotNone(proc_to_kill)
        self.container.terminate_process(proc_to_kill.id)

        self._start_eeagent()

        self.assertNotEqual(old_eea_pid, self._eea_pid)

        state = self.eea_client.dump_state().result
        proc = get_proc_for_upid(state, u_pid)

        self.assertIsNotNone(proc, "There is no state retrieved from eeagent")
        self.assertEqual(proc.get('state'), [850, 'FAILED'])