def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") # self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = "test" self.process_definition = ProcessDefinition( name=self.process_definition_name, executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"}, ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { "highavailability": { "policy": {"interval": 1, "name": "npreserving", "parameters": {"preserve_n": 0}}, "process_definition_id": self.process_definition_id, "dashi_messaging": True, "dashi_exchange": self._haa_dashi_exchange, "dashi_name": self._haa_dashi_name, }, "agent": {"resource_id": self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name ) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_spec': { 'name': 'test', 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = ResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) self.event_queue = queue.Queue() self.event_sub = None
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): #swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = 'test_server' extensions_map = '' def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = '' def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug("assert wants state %s, got state %s, with %s procs" % (want_state,status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'server': { 'trafficsentinel': { 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' } }, 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 5, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): new_policy = { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 0, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 0, 'minimum_processes': 0, } self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result['managed_upids'] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=5\n" % upid self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=1.5\n" % upid self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) self.await_ha_state('STEADY') # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "pid=%s&ml=0.5\n" % upid self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY')
def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class HighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() try: self.container.terminate_process(self._haa_pid) except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) current_names = [i.name for i in services_registered] normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=10): for i in range(0, timeout): status = self.haa_client.status().result if status == want_state: return gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def test_features(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') # verifies L4-CI-CEI-RQ44 # Note: the HA agent is started in the setUp() method, with config # pointing to the test "service". The initial config is set to preserve # 0 service processes. With this reconfigure step below, we change that # to launch 1. new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) result = self.haa_client.dump().result self.assertEqual(result['policy'], new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # verifies L4-CI-CEI-RQ122 and L4-CI-CEI-RQ124 new_policy = {'preserve_n': 2} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) def test_associations(self): # Ensure that once the HA Agent starts, there is a Service object in # the registry result = self.haa_client.dump().result service_id = result.get('service_id') self.assertIsNotNone(service_id) service = self.container.resource_registry.read(service_id) self.assertIsNotNone(service) # Ensure that once a process is started, there is an association between # it and the service new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') proc = self.get_running_procs()[0] processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 1) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 1) self.await_ha_state('STEADY') # Ensure that once we terminate that process, there are no associations new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 0) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 0) # Ensure that once we terminate that HA Agent, the Service object is # cleaned up self.container.terminate_process(self._haa_pid) with self.assertRaises(NotFound): service = self.container.resource_registry.read(service_id) def test_dashi(self): import dashi dashi_conn = dashi.DashiConnection("something", self._haa_dashi_uri, self._haa_dashi_exchange) status = dashi_conn.call(self._haa_dashi_name, "status") assert status in ('PENDING', 'READY', 'STEADY') new_policy = {'preserve_n': 0} dashi_conn.call(self._haa_dashi_name, "reconfigure_policy", new_policy=new_policy)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 20, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'aggregator': { 'type': 'trafficsentinel', 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): #swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = 'test_server' extensions_map = '' def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = '' def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 20, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'aggregator': { 'type': 'trafficsentinel', 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result['managed_upids'] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=1.5\n" self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=0.5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY"
class BaseHighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient( to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition( name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, { "name": self.process_definition_name, "definition": service_definition }) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self._setup_haa_client() def _get_haagent_config(self): return { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging': True, 'dashi_exchange': self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': { 'resource_id': self.resource_id }, } def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient( self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def _spawn_haagent(self, policy_parameters=None): config = deepcopy(self._haa_config) if policy_parameters is not None: config['highavailability']['policy'][ 'parameters'] = policy_parameters self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=config) def _kill_haagent(self): self.container.terminate_process(self._haa_pid) def tearDown(self): new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.assertEqual(len(self.get_running_procs()), 0) self.await_ha_state('STEADY') self.waiter.stop() try: self._kill_haagent() except BadRequest: log.warning( "Couldn't terminate HA Agent in teardown (May have been terminated by a test)" ) self.container.resource_registry.delete(self.service_def_id, del_associations=True) self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [ cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING ] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) normal = [ cserv for cserv in services_registered if cserv.name not in base_names ] return normal def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug( "assert wants state %s, got state %s, with %s procs" % (want_state, status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def await_pyon_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) if service.state == want_state: return else: log.debug("want state %s, got state %s") % (want_state, service.state) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to pyon ha state %s" % (timeout, want_state))
class HighAvailabilityAgentTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_spec': { 'name': 'test', 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = ResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) self.event_queue = queue.Queue() self.event_sub = None def tearDown(self): self.container.terminate_process(self._haa_pid) self._stop_container() def _event_callback(self, event, *args, **kwargs): self.event_queue.put(event) def subscribe_events(self, origin): self.event_sub = EventSubscriber(event_type="ProcessLifecycleEvent", callback=self._event_callback, origin_type="DispatchedProcess") self.event_sub.start() def await_state_event(self, pid, state): event = self.event_queue.get(timeout=10) log.debug("Got event: %s", event) self.assertTrue(event.origin.startswith(pid)) self.assertEqual(event.state, state) return event def get_procs(self): """returns a normalized set of procs (removes the ones that were there at setup time) """ base = self._base_procs current = self.pd_cli.list_processes() normal = [proc for proc in current if proc not in base] return normal @needs_epu def test_features(self): status = self.haa_client.status().result self.assertEqual(status, 'PENDING') new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) result = self.haa_client.dump().result self.assertEqual(result['policy'], new_policy) self.subscribe_events(None) self.await_state_event("test", ProcessStateEnum.SPAWN) self.assertEqual(len(self.get_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" new_policy = {'preserve_n': 2} self.haa_client.reconfigure_policy(new_policy) self.await_state_event("test", ProcessStateEnum.SPAWN) self.assertEqual(len(self.get_procs()), 2) new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.await_state_event("test", ProcessStateEnum.TERMINATE) self.assertEqual(len(self.get_procs()), 1) new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.await_state_event("test", ProcessStateEnum.TERMINATE) self.assertEqual(len(self.get_procs()), 0)
class HighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test' self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "%s.hatests" % bootstrap.get_sys_name() self._haa_config = { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging' : True, 'dashi_exchange' : self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() try: self.container.terminate_process(self._haa_pid) except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) current_names = [i.name for i in services_registered] normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=10): for i in range(0, timeout): status = self.haa_client.status().result if status == want_state: return gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def test_features(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ('PENDING', 'READY', 'STEADY') # verifies L4-CI-CEI-RQ44 # Note: the HA agent is started in the setUp() method, with config # pointing to the test "service". The initial config is set to preserve # 0 service processes. With this reconfigure step below, we change that # to launch 1. new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) result = self.haa_client.dump().result self.assertEqual(result['policy'], new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, 'STEADY') break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Ensure Service object has the correct state result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) self.assertEqual(service.state, ServiceStateEnum.STEADY) # verifies L4-CI-CEI-RQ122 and L4-CI-CEI-RQ124 new_policy = {'preserve_n': 2} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) def test_associations(self): # Ensure that once the HA Agent starts, there is a Service object in # the registry result = self.haa_client.dump().result service_id = result.get('service_id') self.assertIsNotNone(service_id) service = self.container.resource_registry.read(service_id) self.assertIsNotNone(service) # Ensure that once a process is started, there is an association between # it and the service new_policy = {'preserve_n': 1} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) self.await_ha_state('STEADY') proc = self.get_running_procs()[0] processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 1) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 1) self.await_ha_state('STEADY') # Ensure that once we terminate that process, there are no associations new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 0) processes_associated, _ = self.container.resource_registry.find_resources( restype="Process", name=proc.process_id) self.assertEqual(len(processes_associated), 0) has_processes = self.container.resource_registry.find_associations( service, "hasProcess") self.assertEqual(len(has_processes), 0) # Ensure that once we terminate that HA Agent, the Service object is # cleaned up self.container.terminate_process(self._haa_pid) with self.assertRaises(NotFound): service = self.container.resource_registry.read(service_id) def test_dashi(self): import dashi dashi_conn = dashi.DashiConnection("something", self._haa_dashi_uri, self._haa_dashi_exchange) status = dashi_conn.call(self._haa_dashi_name, "status") assert status in ('PENDING', 'READY', 'STEADY') new_policy = {'preserve_n': 0} dashi_conn.call(self._haa_dashi_name, "reconfigure_policy", new_policy=new_policy)
def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition( name="test", executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"} ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { "highavailability": { "policy": { "interval": 1, "name": "sensor", "parameters": { "metric": "app_attributes:ml", "sample_period": 600, "sample_function": "Average", "cooldown_period": 20, "scale_up_threshold": 2.0, "scale_up_n_processes": 1, "scale_down_threshold": 1.0, "scale_down_n_processes": 1, "maximum_processes": 5, "minimum_processes": 1, }, }, "aggregator": { "type": "trafficsentinel", "host": "localhost", "port": http_port, "protocol": "http", "username": "******", "password": "******", }, "process_definition_id": self.process_definition_id, "process_dispatchers": ["process_dispatcher"], }, "agent": {"resource_id": self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class HighAvailabilityAgentSensorPolicyTest(IonIntegrationTestCase): def _start_webserver(self, port=None): """ Start a webserver for testing code download Note: tries really hard to get a port, and if it can't use the suggested port, randomly picks another, and returns it """ def log_message(self, format, *args): # swallow log massages pass class TestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): server_version = "test_server" extensions_map = "" def do_GET(self): self.send_response(200) self.send_header("Content-type", "text/plain") self.send_header("Content-Length", len(self.server.response)) self.end_headers() self.wfile.write(self.server.response) class Server(HTTPServer): response = "" def serve_forever(self): self._serving = 1 while self._serving: self.handle_request() def stop(self): self._serving = 0 if port is None: port = 8008 Handler = TestRequestHandler Handler.log_message = log_message for i in range(0, 100): try: self._webserver = Server(("localhost", port), Handler) except socket.error: print "port %s is in use, picking another" % port port = randint(8000, 10000) continue else: break self._web_glet = gevent.spawn(self._webserver.serve_forever) return port def _stop_webserver(self): if self._webserver is not None: self._webserver.stop() gevent.sleep(2) self._web_glet.kill() @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url("res/deploy/r2cei.yml") self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition( name="test", executable={"module": "ion.agents.cei.test.test_haagent", "class": "TestProcess"} ) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { "highavailability": { "policy": { "interval": 1, "name": "sensor", "parameters": { "metric": "app_attributes:ml", "sample_period": 600, "sample_function": "Average", "cooldown_period": 20, "scale_up_threshold": 2.0, "scale_up_n_processes": 1, "scale_down_threshold": 1.0, "scale_down_n_processes": 1, "maximum_processes": 5, "minimum_processes": 1, }, }, "aggregator": { "type": "trafficsentinel", "host": "localhost", "port": http_port, "protocol": "http", "username": "******", "password": "******", }, "process_definition_id": self.process_definition_id, "process_dispatchers": ["process_dispatcher"], }, "agent": {"resource_id": self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process( name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config, ) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info("Got haa client %s.", str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def tearDown(self): self.waiter.stop() self.container.terminate_process(self._haa_pid) self._stop_webserver() self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [ cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING ] return normal def _get_managed_upids(self): result = self.haa_client.dump().result upids = result["managed_upids"] return upids def _set_response(self, response): self._webserver.response = response def test_sensor_policy(self): status = self.haa_client.status().result # Ensure HA hasn't already failed assert status in ("PENDING", "READY", "STEADY") self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml for each proc such that we scale up upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.RUNNING) self.assertEqual(len(self.get_running_procs()), 2) # Set ml so we stay steady upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=1.5\n" self._set_response(response) self.assertEqual(len(self.get_running_procs()), 2) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY" # Set ml so we scale down upids = self._get_managed_upids() response = "" for upid in upids: response += "%s,ml=0.5\n" self._set_response(response) self.waiter.await_state_event(state=ProcessStateEnum.TERMINATED) self.assertEqual(len(self.get_running_procs()), 1) for i in range(0, 5): status = self.haa_client.status().result try: self.assertEqual(status, "STEADY") break except: gevent.sleep(1) else: assert False, "HA Service took too long to get to state STEADY"
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition = ProcessDefinition(name='test', executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) http_port = 8919 http_port = self._start_webserver(port=http_port) self.resource_id = "haagent_4567" self._haa_name = "high_availability_agent" self._haa_config = { 'server': { 'trafficsentinel': { 'host': 'localhost', 'port': http_port, 'protocol': 'http', 'username': '******', 'password': '******' } }, 'highavailability': { 'policy': { 'interval': 1, 'name': 'sensor', 'parameters': { 'metric': 'app_attributes:ml', 'sample_period': 600, 'sample_function': 'Average', 'cooldown_period': 5, 'scale_up_threshold': 2.0, 'scale_up_n_processes': 1, 'scale_down_threshold': 1.0, 'scale_down_n_processes': 1, 'maximum_processes': 5, 'minimum_processes': 1, } }, 'process_definition_id': self.process_definition_id, "process_dispatchers": [ 'process_dispatcher' ] }, 'agent': {'resource_id': self.resource_id}, } self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=self._haa_config) # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) log.info('Got haa client %s.', str(self._haa_pyon_client)) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)
class BaseHighAvailabilityAgentTest(IonIntegrationTestCase): @needs_epu def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2cei.yml') #self.pd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.pd_cli = ProcessDispatcherServiceClient(to_name="process_dispatcher") self.process_definition_id = uuid4().hex self.process_definition_name = 'test_haagent_%s' % self.process_definition_id self.process_definition = ProcessDefinition(name=self.process_definition_name, executable={ 'module': 'ion.agents.cei.test.test_haagent', 'class': 'TestProcess' }) self.pd_cli.create_process_definition(self.process_definition, self.process_definition_id) service_definition = SERVICE_DEFINITION_TMPL % self.process_definition_name sd = IonObject(RT.ServiceDefinition, {"name": self.process_definition_name, "definition": service_definition}) self.service_def_id, _ = self.container.resource_registry.create(sd) self.resource_id = "haagent_1234" self._haa_name = "high_availability_agent" self._haa_dashi_name = "dashi_haa_" + uuid4().hex self._haa_dashi_uri = get_dashi_uri_from_cfg() self._haa_dashi_exchange = "hatests" self._haa_config = self._get_haagent_config() self._base_services, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) self._base_procs = self.pd_cli.list_processes() self.waiter = ProcessStateWaiter() self.waiter.start() self.container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) self._spawn_haagent() self._setup_haa_client() def _get_haagent_config(self): return { 'highavailability': { 'policy': { 'interval': 1, 'name': 'npreserving', 'parameters': { 'preserve_n': 0 } }, 'process_definition_id': self.process_definition_id, 'dashi_messaging': True, 'dashi_exchange': self._haa_dashi_exchange, 'dashi_name': self._haa_dashi_name }, 'agent': {'resource_id': self.resource_id}, } def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient(self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client) def _spawn_haagent(self, policy_parameters=None): config = deepcopy(self._haa_config) if policy_parameters is not None: config['highavailability']['policy']['parameters'] = policy_parameters self._haa_pid = self.container_client.spawn_process(name=self._haa_name, module="ion.agents.cei.high_availability_agent", cls="HighAvailabilityAgent", config=config) def _kill_haagent(self): self.container.terminate_process(self._haa_pid) def tearDown(self): new_policy = {'preserve_n': 0} self.haa_client.reconfigure_policy(new_policy) self.assertEqual(len(self.get_running_procs()), 0) self.await_ha_state('STEADY') self.waiter.stop() try: self._kill_haagent() except BadRequest: log.warning("Couldn't terminate HA Agent in teardown (May have been terminated by a test)") self.container.resource_registry.delete(self.service_def_id, del_associations=True) self._stop_container() def get_running_procs(self): """returns a normalized set of running procs (removes the ones that were there at setup time) """ base = self._base_procs base_pids = [proc.process_id for proc in base] current = self.pd_cli.list_processes() current_pids = [proc.process_id for proc in current] print "filtering base procs %s from %s" % (base_pids, current_pids) normal = [cproc for cproc in current if cproc.process_id not in base_pids and cproc.process_state == ProcessStateEnum.RUNNING] return normal def get_new_services(self): base = self._base_services base_names = [i.name for i in base] services_registered, _ = self.container.resource_registry.find_resources( restype="Service", name=self.process_definition_name) normal = [cserv for cserv in services_registered if cserv.name not in base_names] return normal def await_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: status = self.haa_client.status().result if status == want_state: return else: procs = self.get_running_procs() num_procs = len(procs) log.debug("assert wants state %s, got state %s, with %s procs" % (want_state,status, num_procs)) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to ha state %s" % (timeout, want_state)) def await_pyon_ha_state(self, want_state, timeout=20): for i in range(0, timeout): try: result = self.haa_client.dump().result service_id = result.get('service_id') service = self.container.resource_registry.read(service_id) if service.state == want_state: return else: log.debug("want state %s, got state %s") % (want_state, service.state) except Exception: log.exception("Problem getting HA status, trying again...") gevent.sleep(1) raise Exception("Took more than %s to get to pyon ha state %s" % (timeout, want_state))
def _setup_haa_client(self): # Start a resource agent client to talk with the instrument agent. self._haa_pyon_client = SimpleResourceAgentClient( self.resource_id, process=FakeProcess()) self.haa_client = HighAvailabilityAgentClient(self._haa_pyon_client)