def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.rr_cli = ResourceRegistryServiceClient() self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = { 'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess' } self.process_definition_id = self.pd_cli.create_process_definition( self.process_definition) self.waiter = ProcessStateWaiter()
def setUp(self): self.dashi = None self._start_container() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self.container = self.container_client._get_container_instance() app = dict(name="process_dispatcher", processapp=("process_dispatcher", "ion.services.cei.process_dispatcher_service", "ProcessDispatcherService")) self.container.start_app(app, config=pd_config) self.rr_cli = self.container.resource_registry self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess'} self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition) self._eea_pids = [] self._tmpdirs = [] self.dashi = get_dashi(uuid.uuid4().hex, pd_config['processdispatcher']['dashi_uri'], pd_config['processdispatcher']['dashi_exchange']) #send a fake node_state message to PD's dashi binding. self.node1_id = uuid.uuid4().hex self._send_node_state("engine1", self.node1_id) self._start_eeagent(self.node1_id) self.waiter = ProcessStateWaiter()
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient( to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition( name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, { "name": self.process_definition_name, "definition": service_definition }) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self.addCleanup(self._stop_haagent) self._setup_haa_client()
def setUp(self): self.dashi = None self._start_container() from pyon.public import CFG self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self.container = self.container_client._get_container_instance() app = dict(name="process_dispatcher", processapp=("process_dispatcher", "ion.services.cei.process_dispatcher_service", "ProcessDispatcherService")) self.container.start_app(app, config=pd_config) self.rr_cli = self.container.resource_registry self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = { 'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess' } self.process_definition_id = self.pd_cli.create_process_definition( self.process_definition) self._eea_pids = [] self._eea_pid_to_resource_id = {} self._eea_pid_to_persistence_dir = {} self._tmpdirs = [] self.dashi = get_dashi( uuid.uuid4().hex, pd_config['processdispatcher']['dashi_uri'], pd_config['processdispatcher']['dashi_exchange'], sysname=CFG.get_safe("dashi.sysname")) #send a fake node_state message to PD's dashi binding. self.node1_id = uuid.uuid4().hex self._send_node_state("engine1", self.node1_id) self._initial_eea_pid = self._start_eeagent(self.node1_id) self.waiter = ProcessStateWaiter()
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess'} self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition) self.waiter = ProcessStateWaiter()
def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") # self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = "test" self.process_definition = ProcessDefinition( name=self.process_definition_name, executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"}, ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { "highavailability": { "policy": {"interval": 1, "name": "npreserving", "parameters": {"preserve_n": 0}}, "process_definition_id": self.process_definition_id, "dashi_messaging": True, "dashi_exchange": self._haa_dashi_exchange, "dashi_name": self._haa_dashi_name, }, "agent": {"resource_id": self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name ) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, {"name": self.process_definition_name, "definition": service_definition}) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self.addCleanup(self._stop_haagent) self._setup_haa_client()
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): #swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = 'test_server' extensions_map = '' def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = '' def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug("assert wants state %s, got state %s, with %s procs" % (want_state,status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'server': { 'trafficsentinel': { 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' } }, 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 5, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): new_policy = { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 0, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 0, 'minimum_processes': 0, } self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result['managed_upids'] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=5\n" % upid self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=1.5\n" % upid self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) self.await_ha_state('STEADY') # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=0.5\n" % upid self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY')
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest): """Run the basic int tests again, with a different environment """ def setUp(self): self.dashi = None self._start_container() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self.container = self.container_client._get_container_instance() app = dict(name="process_dispatcher", processapp=("process_dispatcher", "ion.services.cei.process_dispatcher_service", "ProcessDispatcherService")) self.container.start_app(app, config=pd_config) self.rr_cli = self.container.resource_registry self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess'} self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition) self._eea_pids = [] self._tmpdirs = [] self.dashi = get_dashi(uuid.uuid4().hex, pd_config['processdispatcher']['dashi_uri'], pd_config['processdispatcher']['dashi_exchange']) #send a fake node_state message to PD's dashi binding. self.node1_id = uuid.uuid4().hex self._send_node_state("engine1", self.node1_id) self._start_eeagent(self.node1_id) self.waiter = ProcessStateWaiter() def _send_node_state(self, engine_id, node_id=None): node_id = node_id or uuid.uuid4().hex node_state = dict(node_id=node_id, state=InstanceState.RUNNING, domain_id=domain_id_from_engine(engine_id)) self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state) def _start_eeagent(self, node_id): persistence_dir = tempfile.mkdtemp() self._tmpdirs.append(persistence_dir) agent_config = _get_eeagent_config(node_id, persistence_dir) pid = self.container_client.spawn_process(name="eeagent", module="ion.agents.cei.execution_engine_agent", cls="ExecutionEngineAgent", config=agent_config) log.info('Agent pid=%s.', str(pid)) self._eea_pids.append(pid) def tearDown(self): for pid in self._eea_pids: self.container.terminate_process(pid) for d in self._tmpdirs: shutil.rmtree(d) self.waiter.stop() if self.dashi: self.dashi.cancel() def test_requested_ee(self): # request non-default engine process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.WAITING) # request unknown engine, with NEVER queuing mode. The request # should be rejected. # verifies L4-CI-CEI-RQ52 process_target = ProcessTarget(execution_engine_id="not-a-real-ee") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target rejected_pid = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=rejected_pid) self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED) # now add a node and eeagent for engine2. original process should leave # queue and start running node2_id = uuid.uuid4().hex self._send_node_state("engine2", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) # spawn another process. it should start immediately. process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # one more with node exclusive process_target = ProcessTarget(execution_engine_id="engine2", node_exclusive="hats") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_node_exclusive(self): # the node_exclusive constraint is used to ensure multiple processes # of the same "kind" each get a VM exclusive of each other. Other # processes may run on these VMs, just not processes with the same # node_exclusive tag. Since we cannot directly query the contents # of each node in this test, we prove the capability by scheduling # processes one by one and checking their state. # verifies L4-CI-CEI-RQ121 # verifies L4-CI-CEI-RQ57 # first off, setUp() created a single node and eeagent. # We schedule two processes with the same "abc" node_exclusive # tag. Since there is only one node, the first process should run # and the second should be queued. process_target = ProcessTarget(execution_engine_id="engine1") process_target.node_exclusive = "abc" process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid1 = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING) pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING) # now demonstrate that the node itself is not full by launching # a third process without a node_exclusive tag -- it should start # immediately process_target.node_exclusive = None pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # finally, add a second node to the engine. pid2 should be started # since there is an exclusive "abc" node free. node2_id = uuid.uuid4().hex self._send_node_state("engine1", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_code_download(self): # create a process definition that has no URL; only module and class. process_definition_no_url = ProcessDefinition(name='test_process_nodownload') process_definition_no_url.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess'} process_definition_id_no_url = self.pd_cli.create_process_definition(process_definition_no_url) # create another that has a URL of the python file (this very file) # verifies L4-CI-CEI-RQ114 url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py') process_definition = ProcessDefinition(name='test_process_download') process_definition.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess', 'url': url} process_definition_id = self.pd_cli.create_process_definition(process_definition) process_target = ProcessTarget() process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target self.waiter.start() # Test a module with no download fails pid_no_url = self.pd_cli.create_process(process_definition_id_no_url) self.pd_cli.schedule_process(process_definition_id_no_url, process_schedule, process_id=pid_no_url) self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED) # Test a module with a URL runs pid = self.pd_cli.create_process(process_definition_id) self.pd_cli.schedule_process(process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING)
class ProcessDispatcherServiceIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.rr_cli = ResourceRegistryServiceClient() self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess'} self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition) self.waiter = ProcessStateWaiter() def tearDown(self): self.waiter.stop() def test_create_schedule_cancel(self): process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid) self.assertEqual(pid, pid2) # verifies L4-CI-CEI-RQ141 and L4-CI-CEI-RQ142 self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING) # make sure process is readable directly from RR (mirrored) # verifies L4-CI-CEI-RQ63 # verifies L4-CI-CEI-RQ64 proc = self.rr_cli.read(pid) self.assertEqual(proc.process_id, pid) # now try communicating with the process to make sure it is really running test_client = TestClient() for i in range(5): self.assertEqual(i + 1, test_client.count(timeout=10)) # verifies L4-CI-CEI-RQ147 # kill the process and start it again self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) self.waiter.stop() oldpid = pid pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid) self.assertEqual(pid, pid2) self.assertNotEqual(oldpid, pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) for i in range(5): self.assertEqual(i + 1, test_client.count(timeout=10)) # kill the process for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) def test_schedule_with_config(self): process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) # verifies L4-CI-CEI-RQ66 # feed in a string that the process will return -- verifies that # configuration actually makes it to the instantiated process test_response = uuid.uuid4().hex configuration = {"test_response" : test_response} pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration=configuration, process_id=pid) self.assertEqual(pid, pid2) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) test_client = TestClient() # verifies L4-CI-CEI-RQ139 # assure that configuration block (which can contain inputs, outputs, # and arbitrary config) 1) makes it to the process and 2) is returned # in process queries self.assertEqual(test_client.query(), test_response) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, configuration) # kill the process for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) def test_schedule_bad_config(self): process_schedule = ProcessSchedule() # a non-JSON-serializable IonObject o = ProcessTarget() with self.assertRaises(BadRequest) as ar: self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={"bad": o}) self.assertTrue(ar.exception.message.startswith("bad configuration")) def test_create_invalid_definition(self): # create process definition missing module and class # verifies L4-CI-CEI-RQ137 executable = dict(url="http://somewhere.com/something.py") definition = ProcessDefinition(name="test_process", executable=executable) with self.assertRaises(BadRequest) as ar: self.pd_cli.create_process_definition(definition)
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest): """Run the basic int tests again, with a different environment """ def setUp(self): self.dashi = None self._start_container() from pyon.public import CFG self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self.container = self.container_client._get_container_instance() app = dict(name="process_dispatcher", processapp=("process_dispatcher", "ion.services.cei.process_dispatcher_service", "ProcessDispatcherService")) self.container.start_app(app, config=pd_config) self.rr_cli = self.container.resource_registry self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = {'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess'} self.process_definition_id = self.pd_cli.create_process_definition(self.process_definition) self._eea_pids = [] self._eea_pid_to_resource_id = {} self._eea_pid_to_persistence_dir = {} self._tmpdirs = [] self.dashi = get_dashi(uuid.uuid4().hex, pd_config['processdispatcher']['dashi_uri'], pd_config['processdispatcher']['dashi_exchange'], sysname=CFG.get_safe("dashi.sysname") ) #send a fake node_state message to PD's dashi binding. self.node1_id = uuid.uuid4().hex self._send_node_state("engine1", self.node1_id) self._initial_eea_pid = self._start_eeagent(self.node1_id) self.waiter = ProcessStateWaiter() def _send_node_state(self, engine_id, node_id=None): node_id = node_id or uuid.uuid4().hex node_state = dict(node_id=node_id, state=InstanceState.RUNNING, domain_id=domain_id_from_engine(engine_id)) self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state) def _start_eeagent(self, node_id, resource_id=None, persistence_dir=None): if not persistence_dir: persistence_dir = tempfile.mkdtemp() self._tmpdirs.append(persistence_dir) resource_id = resource_id or uuid.uuid4().hex agent_config = _get_eeagent_config(node_id, persistence_dir, resource_id=resource_id) pid = self.container_client.spawn_process(name="eeagent", module="ion.agents.cei.execution_engine_agent", cls="ExecutionEngineAgent", config=agent_config) log.info('Agent pid=%s.', str(pid)) self._eea_pids.append(pid) self._eea_pid_to_resource_id[pid] = resource_id self._eea_pid_to_persistence_dir[pid] = persistence_dir return pid def _kill_eeagent(self, pid): self.assertTrue(pid in self._eea_pids) self.container.terminate_process(pid) self._eea_pids.remove(pid) del self._eea_pid_to_resource_id[pid] del self._eea_pid_to_persistence_dir[pid] def tearDown(self): for pid in list(self._eea_pids): self._kill_eeagent(pid) for d in self._tmpdirs: shutil.rmtree(d) self.waiter.stop() if self.dashi: self.dashi.cancel() def test_requested_ee(self): # request non-default engine process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.WAITING) # request unknown engine, with NEVER queuing mode. The request # should be rejected. # verifies L4-CI-CEI-RQ52 process_target = ProcessTarget(execution_engine_id="not-a-real-ee") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target rejected_pid = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=rejected_pid) self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED) # now add a node and eeagent for engine2. original process should leave # queue and start running node2_id = uuid.uuid4().hex self._send_node_state("engine2", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) # spawn another process. it should start immediately. process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # one more with node exclusive process_target = ProcessTarget(execution_engine_id="engine2", node_exclusive="hats") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_node_exclusive(self): # the node_exclusive constraint is used to ensure multiple processes # of the same "kind" each get a VM exclusive of each other. Other # processes may run on these VMs, just not processes with the same # node_exclusive tag. Since we cannot directly query the contents # of each node in this test, we prove the capability by scheduling # processes one by one and checking their state. # verifies L4-CI-CEI-RQ121 # verifies L4-CI-CEI-RQ57 # first off, setUp() created a single node and eeagent. # We schedule two processes with the same "abc" node_exclusive # tag. Since there is only one node, the first process should run # and the second should be queued. process_target = ProcessTarget(execution_engine_id="engine1") process_target.node_exclusive = "abc" process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid1 = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING) pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING) # now demonstrate that the node itself is not full by launching # a third process without a node_exclusive tag -- it should start # immediately process_target.node_exclusive = None pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # finally, add a second node to the engine. pid2 should be started # since there is an exclusive "abc" node free. node2_id = uuid.uuid4().hex self._send_node_state("engine1", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_code_download(self): # create a process definition that has no URL; only module and class. process_definition_no_url = ProcessDefinition(name='test_process_nodownload') process_definition_no_url.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess'} process_definition_id_no_url = self.pd_cli.create_process_definition(process_definition_no_url) # create another that has a URL of the python file (this very file) # verifies L4-CI-CEI-RQ114 url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py') process_definition = ProcessDefinition(name='test_process_download') process_definition.executable = {'module': 'ion.my.test.process', 'class': 'TestProcess', 'url': url} process_definition_id = self.pd_cli.create_process_definition(process_definition) process_target = ProcessTarget() process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target self.waiter.start() # Test a module with no download fails pid_no_url = self.pd_cli.create_process(process_definition_id_no_url) self.pd_cli.schedule_process(process_definition_id_no_url, process_schedule, process_id=pid_no_url) self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED) # Test a module with a URL runs pid = self.pd_cli.create_process(process_definition_id) self.pd_cli.schedule_process(process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) def _add_test_process(self, restart_mode=None): process_schedule = ProcessSchedule() if restart_mode is not None: process_schedule.restart_mode = restart_mode pid = self.pd_cli.create_process(self.process_definition_id) pid_listen_name = "PDtestproc_%s" % uuid.uuid4().hex config = {'process': {'listen_name': pid_listen_name}} self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid, configuration=config) client = TestClient(to_name=pid_listen_name) return pid, client def test_restart(self): self.waiter.start() restartable_pids = [] nonrestartable_pids = [] clients = {} # start 10 processes with RestartMode.ALWAYS for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.ALWAYS) restartable_pids.append(pid) clients[pid] = client # and 10 processes with RestartMode.ABNORMAL for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.ABNORMAL) restartable_pids.append(pid) clients[pid] = client # and 10 with RestartMode.NEVER for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.NEVER) nonrestartable_pids.append(pid) clients[pid] = client all_pids = restartable_pids + nonrestartable_pids self.waiter.await_many_state_events(all_pids, ProcessStateEnum.RUNNING) for pid in all_pids: client = clients[pid] self.assertFalse(client.is_restart()) self.assertEqual(client.count(), 1) # now kill the whole eeagent and restart it. processes should # show up as FAILED in the next heartbeat. resource_id = self._eea_pid_to_resource_id[self._initial_eea_pid] persistence_dir = self._eea_pid_to_persistence_dir[self._initial_eea_pid] log.debug("Restarting eeagent %s", self._initial_eea_pid) self._kill_eeagent(self._initial_eea_pid) # manually kill the processes to simulate a real container failure for pid in all_pids: self.container.terminate_process(pid) self._start_eeagent(self.node1_id, resource_id=resource_id, persistence_dir=persistence_dir) # wait for restartables to restart self.waiter.await_many_state_events(restartable_pids, ProcessStateEnum.RUNNING) # query the processes again. it should have restart mode config for pid in restartable_pids: client = clients[pid] self.assertTrue(client.is_restart()) self.assertEqual(client.count(), 1) # meanwhile some procs should not have restarted for pid in nonrestartable_pids: proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_state, ProcessStateEnum.FAILED) # guard against extraneous events we were receiving as part of a bug: # processes restarting again after they were already restarted self.waiter.await_nothing(timeout=5) def test_idempotency(self): # ensure every operation can be safely retried process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS proc_name = 'myreallygoodname' pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) # note: if we import UNSCHEDULED state into ProcessStateEnum, # this assertion will need to change. proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_state, ProcessStateEnum.REQUESTED) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid, name=proc_name) self.assertEqual(pid, pid2) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) # repeating schedule is harmless pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid, name=proc_name) self.assertEqual(pid, pid2) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING) self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) # repeating cancel is harmless self.pd_cli.cancel_process(pid) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.TERMINATED)
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): # swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = "test_server" extensions_map = "" def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = "" def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition( name="test", executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"} ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { "highavailability": { "policy": { "interval": 1, "name": "sensor", "parameters": { "metric": "app_attributes:ml", "sample_period": 600, "sample_function": "Average", "cooldown_period": 20, "scale_up_threshold": 2.0, "scale_up_n_processes": 1, "scale_down_threshold": 1.0, "scale_down_n_processes": 1, "maximum_processes": 5, "minimum_processes": 1, }, }, "aggregator": { "type": "trafficsentinel", "host": "localhost", "port": http_port, "protocol": "http", "username": "******", "password": "******", }, "process_definition_id": self.process_definition_id, "process_dispatchers": ["process_dispatcher"], }, "agent": {"resource_id": self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [ cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING ] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result["managed_upids"] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ("PENDING", "READY", "STEADY") self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=1.5\n" self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=0.5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY"
class HighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() try: self.container.terminate_process(self._haa_pid) except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) current_names = [i.name for i in services_registered] normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=10): for i in range(0, timeout): status = self.haa_client.status().result if status == want_state: return gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def test_features(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') # verifies L4-CI-CEI-RQ44 # Note: the HA agent is started in the setUp() method, with config # pointing to the test "service". The initial config is set to preserve # 0 service processes. With this reconfigure step below, we change that # to launch 1. new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) result = self.haa_client.dump().result self.assertEqual(result['policy'], new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # verifies L4-CI-CEI-RQ122 and L4-CI-CEI-RQ124 new_policy = {'preserve_n': 2} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) def test_associations(self): # Ensure that once the HA Agent starts, there is a Service object in # the registry result = self.haa_client.dump().result service_id = result.get('service_id') self.assertIsNotNone(service_id) service = self.container.resource_registry.read(service_id) self.assertIsNotNone(service) # Ensure that once a process is started, there is an association between # it and the service new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') proc = self.get_running_procs()[0] processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 1) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 1) self.await_ha_state('STEADY') # Ensure that once we terminate that process, there are no associations new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 0) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 0) # Ensure that once we terminate that HA Agent, the Service object is # cleaned up self.container.terminate_process(self._haa_pid) with self.assertRaises(NotFound): service = self.container.resource_registry.read(service_id) def test_dashi(self): import dashi dashi_conn = dashi.DashiConnection("something", self._haa_dashi_uri, self._haa_dashi_exchange) status = dashi_conn.call(self._haa_dashi_name, "status") assert status in ('PENDING', 'READY', 'STEADY') new_policy = {'preserve_n': 0} dashi_conn.call(self._haa_dashi_name, "reconfigure_policy", new_policy=new_policy)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 20, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'aggregator': { 'type': 'trafficsentinel', 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class BaseHighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient( to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition( name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, { "name": self.process_definition_name, "definition": service_definition }) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self._setup_haa_client() def _get_haagent_config(self): return { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging': True, 'dashi_exchange': self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': { 'resource_id': self.resource_id }, } def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient( self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def _spawn_haagent(self, policy_parameters=None): config = deepcopy(self._haa_config) if policy_parameters is not None: config['highavailability']['policy'][ 'parameters'] = policy_parameters self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=config) def _kill_haagent(self): self.container.terminate_process(self._haa_pid) def tearDown(self): new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.assertEqual(len(self.get_running_procs()), 0) self.await_ha_state('STEADY') self.waiter.stop() try: self._kill_haagent() except BadRequest: log.warning( "Couldn't terminate HA Agent in teardown (May have been terminated by a test)" ) self.container.resource_registry.delete(self.service_def_id, del_associations=True) self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [ cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING ] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) normal = [ cserv for cserv in services_registered if cserv.name not in base_names ] return normal def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug( "assert wants state %s, got state %s, with %s procs" % (want_state, status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def await_pyon_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) if service.state == want_state: return else: log.debug("want state %s, got state %s") % (want_state, service.state) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to pyon ha state %s" % (timeout, want_state))
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'server': { 'trafficsentinel': { 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' } }, 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 5, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class HighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() try: self.container.terminate_process(self._haa_pid) except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) current_names = [i.name for i in services_registered] normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=10): for i in range(0, timeout): status = self.haa_client.status().result if status == want_state: return gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def test_features(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') # verifies L4-CI-CEI-RQ44 # Note: the HA agent is started in the setUp() method, with config # pointing to the test "service". The initial config is set to preserve # 0 service processes. With this reconfigure step below, we change that # to launch 1. new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) result = self.haa_client.dump().result self.assertEqual(result['policy'], new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Ensure Service object has the correct state result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) self.assertEqual(service.state, ServiceStateEnum.STEADY) # verifies L4-CI-CEI-RQ122 and L4-CI-CEI-RQ124 new_policy = {'preserve_n': 2} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) def test_associations(self): # Ensure that once the HA Agent starts, there is a Service object in # the registry result = self.haa_client.dump().result service_id = result.get('service_id') self.assertIsNotNone(service_id) service = self.container.resource_registry.read(service_id) self.assertIsNotNone(service) # Ensure that once a process is started, there is an association between # it and the service new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') proc = self.get_running_procs()[0] processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 1) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 1) self.await_ha_state('STEADY') # Ensure that once we terminate that process, there are no associations new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 0) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 0) # Ensure that once we terminate that HA Agent, the Service object is # cleaned up self.container.terminate_process(self._haa_pid) with self.assertRaises(NotFound): service = self.container.resource_registry.read(service_id) def test_dashi(self): import dashi dashi_conn = dashi.DashiConnection("something", self._haa_dashi_uri, self._haa_dashi_exchange) status = dashi_conn.call(self._haa_dashi_name, "status") assert status in ('PENDING', 'READY', 'STEADY') new_policy = {'preserve_n': 0} dashi_conn.call(self._haa_dashi_name, "reconfigure_policy", new_policy=new_policy)
def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition( name="test", executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"} ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { "highavailability": { "policy": { "interval": 1, "name": "sensor", "parameters": { "metric": "app_attributes:ml", "sample_period": 600, "sample_function": "Average", "cooldown_period": 20, "scale_up_threshold": 2.0, "scale_up_n_processes": 1, "scale_down_threshold": 1.0, "scale_down_n_processes": 1, "maximum_processes": 5, "minimum_processes": 1, }, }, "aggregator": { "type": "trafficsentinel", "host": "localhost", "port": http_port, "protocol": "http", "username": "******", "password": "******", }, "process_definition_id": self.process_definition_id, "process_dispatchers": ["process_dispatcher"], }, "agent": {"resource_id": self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class ProcessDispatcherEEAgentIntTest(ProcessDispatcherServiceIntTest): """Run the basic int tests again, with a different environment """ def setUp(self): self.dashi = None self._start_container() from pyon.public import CFG self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self.container = self.container_client._get_container_instance() app = dict(name="process_dispatcher", processapp=("process_dispatcher", "ion.services.cei.process_dispatcher_service", "ProcessDispatcherService")) self.container.start_app(app, config=pd_config) self.rr_cli = self.container.resource_registry self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = { 'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess' } self.process_definition_id = self.pd_cli.create_process_definition( self.process_definition) self._eea_pids = [] self._eea_pid_to_resource_id = {} self._eea_pid_to_persistence_dir = {} self._tmpdirs = [] self.dashi = get_dashi( uuid.uuid4().hex, pd_config['processdispatcher']['dashi_uri'], pd_config['processdispatcher']['dashi_exchange'], sysname=CFG.get_safe("dashi.sysname")) #send a fake node_state message to PD's dashi binding. self.node1_id = uuid.uuid4().hex self._send_node_state("engine1", self.node1_id) self._initial_eea_pid = self._start_eeagent(self.node1_id) self.waiter = ProcessStateWaiter() def _send_node_state(self, engine_id, node_id=None): node_id = node_id or uuid.uuid4().hex node_state = dict(node_id=node_id, state=InstanceState.RUNNING, domain_id=domain_id_from_engine(engine_id)) self.dashi.fire(get_pd_dashi_name(), "node_state", args=node_state) def _start_eeagent(self, node_id, resource_id=None, persistence_dir=None): if not persistence_dir: persistence_dir = tempfile.mkdtemp() self._tmpdirs.append(persistence_dir) resource_id = resource_id or uuid.uuid4().hex agent_config = _get_eeagent_config(node_id, persistence_dir, resource_id=resource_id) pid = self.container_client.spawn_process( name="eeagent", module="ion.agents.cei.execution_engine_agent", cls="ExecutionEngineAgent", config=agent_config) log.info('Agent pid=%s.', str(pid)) self._eea_pids.append(pid) self._eea_pid_to_resource_id[pid] = resource_id self._eea_pid_to_persistence_dir[pid] = persistence_dir return pid def _kill_eeagent(self, pid): self.assertTrue(pid in self._eea_pids) self.container.terminate_process(pid) self._eea_pids.remove(pid) del self._eea_pid_to_resource_id[pid] del self._eea_pid_to_persistence_dir[pid] def tearDown(self): for pid in list(self._eea_pids): self._kill_eeagent(pid) for d in self._tmpdirs: shutil.rmtree(d) self.waiter.stop() if self.dashi: self.dashi.cancel() def test_requested_ee(self): # request non-default engine process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.WAITING) # request unknown engine, with NEVER queuing mode. The request # should be rejected. # verifies L4-CI-CEI-RQ52 process_target = ProcessTarget(execution_engine_id="not-a-real-ee") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target rejected_pid = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=rejected_pid) self.waiter.await_state_event(rejected_pid, ProcessStateEnum.REJECTED) # now add a node and eeagent for engine2. original process should leave # queue and start running node2_id = uuid.uuid4().hex self._send_node_state("engine2", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) # spawn another process. it should start immediately. process_target = ProcessTarget(execution_engine_id="engine2") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # one more with node exclusive process_target = ProcessTarget(execution_engine_id="engine2", node_exclusive="hats") process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.NEVER process_schedule.target = process_target pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_node_exclusive(self): # the node_exclusive constraint is used to ensure multiple processes # of the same "kind" each get a VM exclusive of each other. Other # processes may run on these VMs, just not processes with the same # node_exclusive tag. Since we cannot directly query the contents # of each node in this test, we prove the capability by scheduling # processes one by one and checking their state. # verifies L4-CI-CEI-RQ121 # verifies L4-CI-CEI-RQ57 # first off, setUp() created a single node and eeagent. # We schedule two processes with the same "abc" node_exclusive # tag. Since there is only one node, the first process should run # and the second should be queued. process_target = ProcessTarget(execution_engine_id="engine1") process_target.node_exclusive = "abc" process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target pid1 = self.pd_cli.create_process(self.process_definition_id) self.waiter.start() self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.RUNNING) pid2 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.WAITING) # now demonstrate that the node itself is not full by launching # a third process without a node_exclusive tag -- it should start # immediately process_target.node_exclusive = None pid3 = self.pd_cli.create_process(self.process_definition_id) self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.RUNNING) # finally, add a second node to the engine. pid2 should be started # since there is an exclusive "abc" node free. node2_id = uuid.uuid4().hex self._send_node_state("engine1", node2_id) self._start_eeagent(node2_id) self.waiter.await_state_event(pid2, ProcessStateEnum.RUNNING) # kill the processes for good self.pd_cli.cancel_process(pid1) self.waiter.await_state_event(pid1, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid2) self.waiter.await_state_event(pid2, ProcessStateEnum.TERMINATED) self.pd_cli.cancel_process(pid3) self.waiter.await_state_event(pid3, ProcessStateEnum.TERMINATED) def test_code_download(self): # create a process definition that has no URL; only module and class. process_definition_no_url = ProcessDefinition( name='test_process_nodownload') process_definition_no_url.executable = { 'module': 'ion.my.test.process', 'class': 'TestProcess' } process_definition_id_no_url = self.pd_cli.create_process_definition( process_definition_no_url) # create another that has a URL of the python file (this very file) # verifies L4-CI-CEI-RQ114 url = "file://%s" % os.path.join(os.path.dirname(__file__), 'test_process_dispatcher.py') process_definition = ProcessDefinition(name='test_process_download') process_definition.executable = { 'module': 'ion.my.test.process', 'class': 'TestProcess', 'url': url } process_definition_id = self.pd_cli.create_process_definition( process_definition) process_target = ProcessTarget() process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS process_schedule.target = process_target self.waiter.start() # Test a module with no download fails pid_no_url = self.pd_cli.create_process(process_definition_id_no_url) self.pd_cli.schedule_process(process_definition_id_no_url, process_schedule, process_id=pid_no_url) self.waiter.await_state_event(pid_no_url, ProcessStateEnum.FAILED) # Test a module with a URL runs pid = self.pd_cli.create_process(process_definition_id) self.pd_cli.schedule_process(process_definition_id, process_schedule, process_id=pid) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) def _add_test_process(self, restart_mode=None): process_schedule = ProcessSchedule() if restart_mode is not None: process_schedule.restart_mode = restart_mode pid = self.pd_cli.create_process(self.process_definition_id) pid_listen_name = "PDtestproc_%s" % uuid.uuid4().hex config = {'process': {'listen_name': pid_listen_name}} self.pd_cli.schedule_process(self.process_definition_id, process_schedule, process_id=pid, configuration=config) client = TestClient(to_name=pid_listen_name) return pid, client def test_restart(self): self.waiter.start() restartable_pids = [] nonrestartable_pids = [] clients = {} # start 10 processes with RestartMode.ALWAYS for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.ALWAYS) restartable_pids.append(pid) clients[pid] = client # and 10 processes with RestartMode.ABNORMAL for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.ABNORMAL) restartable_pids.append(pid) clients[pid] = client # and 10 with RestartMode.NEVER for _ in range(10): pid, client = self._add_test_process(ProcessRestartMode.NEVER) nonrestartable_pids.append(pid) clients[pid] = client all_pids = restartable_pids + nonrestartable_pids self.waiter.await_many_state_events(all_pids, ProcessStateEnum.RUNNING) for pid in all_pids: client = clients[pid] self.assertFalse(client.is_restart()) self.assertEqual(client.count(), 1) # now kill the whole eeagent and restart it. processes should # show up as FAILED in the next heartbeat. resource_id = self._eea_pid_to_resource_id[self._initial_eea_pid] persistence_dir = self._eea_pid_to_persistence_dir[ self._initial_eea_pid] log.debug("Restarting eeagent %s", self._initial_eea_pid) self._kill_eeagent(self._initial_eea_pid) # manually kill the processes to simulate a real container failure for pid in all_pids: self.container.terminate_process(pid) self._start_eeagent(self.node1_id, resource_id=resource_id, persistence_dir=persistence_dir) # wait for restartables to restart self.waiter.await_many_state_events(restartable_pids, ProcessStateEnum.RUNNING) # query the processes again. it should have restart mode config for pid in restartable_pids: client = clients[pid] self.assertTrue(client.is_restart()) self.assertEqual(client.count(), 1) # meanwhile some procs should not have restarted for pid in nonrestartable_pids: proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_state, ProcessStateEnum.FAILED) # guard against extraneous events we were receiving as part of a bug: # processes restarting again after they were already restarted self.waiter.await_nothing(timeout=5) def test_idempotency(self): # ensure every operation can be safely retried process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS proc_name = 'myreallygoodname' pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) # note: if we import UNSCHEDULED state into ProcessStateEnum, # this assertion will need to change. proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_state, ProcessStateEnum.REQUESTED) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid, name=proc_name) self.assertEqual(pid, pid2) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) # repeating schedule is harmless pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid, name=proc_name) self.assertEqual(pid, pid2) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING) self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) # repeating cancel is harmless self.pd_cli.cancel_process(pid) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.TERMINATED)
class ProcessDispatcherServiceIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.rr_cli = ResourceRegistryServiceClient() self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.process_definition = ProcessDefinition(name='test_process') self.process_definition.executable = { 'module': 'ion.services.cei.test.test_process_dispatcher', 'class': 'TestProcess' } self.process_definition_id = self.pd_cli.create_process_definition( self.process_definition) self.waiter = ProcessStateWaiter() def tearDown(self): self.waiter.stop() def test_create_schedule_cancel(self): process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS proc_name = 'myreallygoodname' pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid, name=proc_name) self.assertEqual(pid, pid2) # verifies L4-CI-CEI-RQ141 and L4-CI-CEI-RQ142 self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, {}) self.assertEqual(proc.process_state, ProcessStateEnum.RUNNING) # make sure process is readable directly from RR (mirrored) # verifies L4-CI-CEI-RQ63 # verifies L4-CI-CEI-RQ64 proc = self.rr_cli.read(pid) self.assertEqual(proc.process_id, pid) # now try communicating with the process to make sure it is really running test_client = TestClient() for i in range(5): self.assertEqual(i + 1, test_client.count(timeout=10)) # verifies L4-CI-CEI-RQ147 # check the process name was set in container got_proc_name = test_client.get_process_name(pid=pid2) self.assertEqual(proc_name, got_proc_name) # kill the process and start it again self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={}, process_id=pid) self.assertEqual(pid, pid2) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) for i in range(5): self.assertEqual(i + 1, test_client.count(timeout=10)) # kill the process for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) def test_schedule_with_config(self): process_schedule = ProcessSchedule() process_schedule.queueing_mode = ProcessQueueingMode.ALWAYS pid = self.pd_cli.create_process(self.process_definition_id) self.waiter.start(pid) # verifies L4-CI-CEI-RQ66 # feed in a string that the process will return -- verifies that # configuration actually makes it to the instantiated process test_response = uuid.uuid4().hex configuration = {"test_response": test_response} pid2 = self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration=configuration, process_id=pid) self.assertEqual(pid, pid2) self.waiter.await_state_event(pid, ProcessStateEnum.RUNNING) test_client = TestClient() # verifies L4-CI-CEI-RQ139 # assure that configuration block (which can contain inputs, outputs, # and arbitrary config) 1) makes it to the process and 2) is returned # in process queries self.assertEqual(test_client.query(), test_response) proc = self.pd_cli.read_process(pid) self.assertEqual(proc.process_id, pid) self.assertEqual(proc.process_configuration, configuration) # kill the process for good self.pd_cli.cancel_process(pid) self.waiter.await_state_event(pid, ProcessStateEnum.TERMINATED) def test_schedule_bad_config(self): process_schedule = ProcessSchedule() # a non-JSON-serializable IonObject o = ProcessTarget() with self.assertRaises(BadRequest) as ar: self.pd_cli.schedule_process(self.process_definition_id, process_schedule, configuration={"bad": o}) self.assertTrue(ar.exception.message.startswith("bad configuration")) def test_cancel_notfound(self): with self.assertRaises(NotFound): self.pd_cli.cancel_process("not-a-real-process-id") def test_create_invalid_definition(self): # create process definition missing module and class # verifies L4-CI-CEI-RQ137 executable = dict(url="http://somewhere.com/something.py") definition = ProcessDefinition(name="test_process", executable=executable) with self.assertRaises(BadRequest): self.pd_cli.create_process_definition(definition)
class BaseHighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, {"name": self.process_definition_name, "definition": service_definition}) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self._setup_haa_client() def _get_haagent_config(self): return { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging': True, 'dashi_exchange': self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def _spawn_haagent(self, policy_parameters=None): config = deepcopy(self._haa_config) if policy_parameters is not None: config['highavailability']['policy']['parameters'] = policy_parameters self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=config) def _kill_haagent(self): self.container.terminate_process(self._haa_pid) def tearDown(self): new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.assertEqual(len(self.get_running_procs()), 0) self.await_ha_state('STEADY') self.waiter.stop() try: self._kill_haagent() except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self.container.resource_registry.delete(self.service_def_id, del_associations=True) self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug("assert wants state %s, got state %s, with %s procs" % (want_state,status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def await_pyon_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) if service.state == want_state: return else: log.debug("want state %s, got state %s") % (want_state, service.state) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to pyon ha state %s" % (timeout, want_state))
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): #swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = 'test_server' extensions_map = '' def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = '' def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 20, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'aggregator': { 'type': 'trafficsentinel', 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result['managed_upids'] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=1.5\n" self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=0.5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY"