def test_default_restart_count(self): """Verifies restart/finish file interaction.""" manifest = yaml.load(""" --- name: foo.test1 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) self.assertTrue(app_presence.start_service('web_server')) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') # App will run once. with open(finished_file, 'a+') as f: f.write('1000 1 0\n') self.assertFalse(app_presence.start_service('web_server'))
def test_update_exit_status(self): """Verifies reading the finished file and updating task status.""" manifest = yaml.load(""" --- name: foo.test1#0001 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server restart_count: 3 - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') with open(finished_file, 'a+') as f: f.write('1000 1 0\n') app_presence.update_exit_status('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, 'foo.test1#0001', 'exit', 'web_server.1.0', ) kazoo.client.KazooClient.create.reset_mock() with open(finished_file, 'a+') as f: f.write('2000 9 255\n') app_presence.update_exit_status('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, 'foo.test1#0001', 'exit', 'web_server.9.255', ) reported_file = os.path.join(self.root, 'services', 'web_server', 'reported') self.assertTrue(os.path.exists(reported_file)) # Calling update state twice is no-op, as reported file is newer. kazoo.client.KazooClient.create.reset_mock() app_presence.update_exit_status('web_server') self.assertFalse(kazoo.client.KazooClient.create.called)
def test_report_running(self): """Verifies report running sequence.""" manifest = { 'vip': { 'ip0': '192.168.0.1', 'ip1': '192.168.0.2' }, 'task': 't-0001', 'name': 'foo.test1#0001', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [{ 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 3 } }, { 'command': 'sshd -D -f /etc/ssh/sshd_config', 'name': 'sshd', 'proid': None }], 'endpoints': [{ 'port': 22, 'name': 'ssh', 'real_port': 5001 }, { 'port': 8000, 'name': 'http', 'real_port': 5000 }] } treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) kazoo.client.KazooClient.exists.return_value = False app_presence.report_running('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, events.ServiceRunningTraceEvent(instanceid='foo.test1#0001', uniqueid='AAAAAA', service='web_server')) kazoo.client.KazooClient.exists.return_value = True app_presence.report_running('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, events.ServiceRunningTraceEvent(instanceid='foo.test1#0001', uniqueid='AAAAAA', service='web_server'))
def monitor(manifest, container_dir, appevents_dir): """Monitor container services.""" app = yaml.load(manifest.read()) with lc.LogContext(_LOGGER, app['name'], lc.ContainerAdapter) as log: svc_presence = presence.ServicePresence( app, container_dir, appevents_dir, ) sys_dir = os.path.join(container_dir, 'sys') svc_sup_dir = os.path.join(sys_dir, 'start_container') failed_svc = None killed = False # Check that start_container was not terminated. This fixed race # condition if the presence exits and while restarted, # start_container is terminated. svc_sup_ran_once = os.path.exists( os.path.join(svc_sup_dir, 'self.pid')) log.info('services supervisor ran once: %s', svc_sup_ran_once) svc_sup_down = presence.is_down(svc_sup_dir) log.info('services supervisor down: %s', svc_sup_down) if svc_sup_down and svc_sup_ran_once: log.info('services supervisor was terminated, exiting.') else: svc_presence.ensure_supervisors_running() # Try to start the service, taking into account number of # restarts. # If the number of restarts is more than specified, delete app # from the model, which will trigger container shutdown. # # In case of container shutdown (application evicted from the # server), exit_app will not be called. while True: success, failed_svc = svc_presence.start_all() if not success: break svc_presence.wait_for_exit(svc_sup_dir) if presence.is_down(svc_sup_dir): log.info('Container services supervisor is down.') failed_svc = None killed = True break svc_presence.exit_app(failed_svc, killed=killed) log.info('Shutting down sys supervisor.') subproc.call(['s6-svscanctl', '-pi', sys_dir])
def test_exit_info(self): """Tests collection of exit info.""" manifest = yaml.load(""" --- name: foo.test1#0001 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server restart_count: 10000 - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') with open(finished_file, 'a+') as f: f.write('1000 1 0\n') app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) ws_svc_dir = os.path.join(self.root, 'services', 'web_server') einfo, count = app_presence.exit_info(ws_svc_dir) self.assertEquals(1, count) self.assertEquals(1, einfo['rc']) self.assertEquals(0, einfo['sig']) self.assertFalse(einfo['oom']) with open(finished_file, 'a+') as f: f.write('1001 255 9\n') einfo, count = app_presence.exit_info(ws_svc_dir) self.assertEquals(2, count) self.assertEquals(255, einfo['rc']) self.assertEquals(9, einfo['sig']) self.assertFalse(einfo['oom']) open_name = '__builtin__.open' with mock.patch(open_name, mock.mock_open()) as mock_open: file_mock = mock.MagicMock(spec=file) file_mock.__enter__.return_value.read.return_value = '1' mock_open.return_value = file_mock self.assertTrue(presence.is_oom())
def test_app_exit(self): """Verifies app deletion on service exit.""" manifest = yaml.load(""" --- name: foo.test1#0001 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server restart_count: 3 - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) services_dir = os.path.join(self.root, 'services') os.mkdir(services_dir) treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) app_presence.services['web_server']['last_exit'] = { 'rc': 1, 'sig': 3, } app_presence.exit_app('web_server') self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo'))) self.assertEquals( yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), { 'rc': 1, 'sig': 3, 'service': 'web_server', 'killed': False, 'oom': False }) del app_presence.services['web_server']['last_exit'] app_presence.exit_app('web_server') self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo'))) self.assertEquals( yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), { 'service': 'web_server', 'killed': False, 'oom': False })
def test_restart_rate(self): """Verifies reading the finished file and updating task status.""" manifest = yaml.load(""" --- name: foo.test1#0001 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server restart_count: 10000 - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') time.time.return_value = 1059 # Five restarts in less than 60 sec. with open(finished_file, 'a+') as f: f.write('1000 1 0\n') f.write('1001 1 0\n') f.write('1002 1 0\n') f.write('1003 1 0\n') f.write('1059 1 0\n') self.assertFalse(app_presence.start_service('web_server')) # Fifth restart is 100 sec away. time.time.return_value = 1105 with open(finished_file, 'a+') as f: f.write('1000 1 0\n') f.write('1100 1 0\n') f.write('1102 1 0\n') f.write('1103 1 0\n') f.write('1104 1 0\n') self.assertTrue(app_presence.start_service('web_server'))
def test_report_running(self): """Verifies report running sequence.""" manifest = yaml.load(""" --- name: foo.test1#0001 proid: andreik services: - command: /usr/bin/python -m SimpleHTTPServer name: web_server restart_count: 3 - command: sshd -D -f /etc/ssh/sshd_config endpoints: name: sshd proid: ~ endpoints: - {name: ssh, port: 22, real_port: 5001} - {name: http, port: 8000, real_port: 5000} vip: {ip0: 192.168.0.1, ip1: 192.168.0.2} task: t-0001 """) treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) kazoo.client.KazooClient.exists.return_value = False app_presence.report_running('web_server') treadmill.appevents.post.assert_called_with(self.events_dir, 'foo.test1#0001', 'running', 'web_server') kazoo.client.KazooClient.exists.return_value = True app_presence.report_running('web_server') treadmill.appevents.post.assert_called_with(self.events_dir, 'foo.test1#0001', 'running', 'web_server')
def test_exit_info(self): """Tests collection of exit info.""" manifest = { 'vip': { 'ip0': '192.168.0.1', 'ip1': '192.168.0.2' }, 'task': 't-0001', 'name': 'foo.test1#0001', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [{ 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 3 } }, { 'command': 'sshd -D -f /etc/ssh/sshd_config', 'name': 'sshd', 'proid': None }], 'endpoints': [{ 'port': 22, 'name': 'ssh', 'real_port': 5001 }, { 'port': 8000, 'name': 'http', 'real_port': 5000 }] } os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') with open(finished_file, 'a+') as f: f.write('1000 1 0\n') app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) ws_svc_dir = os.path.join(self.root, 'services', 'web_server') einfo, count = app_presence.exit_info(ws_svc_dir) self.assertEquals(1, count) self.assertEquals(1, einfo['rc']) self.assertEquals(0, einfo['sig']) self.assertFalse(einfo['oom']) with open(finished_file, 'a+') as f: f.write('1001 255 9\n') einfo, count = app_presence.exit_info(ws_svc_dir) self.assertEquals(2, count) self.assertEquals(255, einfo['rc']) self.assertEquals(9, einfo['sig']) self.assertFalse(einfo['oom']) open_name = '__builtin__.open' with mock.patch(open_name, mock.mock_open()) as mock_open: file_mock = mock.MagicMock(spec=file) file_mock.__enter__.return_value.read.return_value = '1' mock_open.return_value = file_mock self.assertTrue(presence.is_oom())
def test_restart_rate(self): """Verifies reading the finished file and updating task status.""" manifest = { 'task': 't-0001', 'name': 'foo.test1#0001', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [ { 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 5 } }, ], } treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') time.time.return_value = 1059 # Five restarts in less than 60 sec, service should not be restarted with open(finished_file, 'w') as f: f.write('1000 1 0\n') f.write('1001 1 0\n') f.write('1002 1 0\n') f.write('1003 1 0\n') f.write('1059 1 0\n') self.assertFalse(app_presence.start_service('web_server')) # Fifth restart is 105 sec away, service should be restarted time.time.return_value = 1105 with open(finished_file, 'w') as f: f.write('1000 1 0\n') f.write('1101 1 0\n') f.write('1102 1 0\n') f.write('1103 1 0\n') f.write('1104 1 0\n') self.assertTrue(app_presence.start_service('web_server')) time.time.return_value = 2000 # Last restart in more than 30 sec, should be restarted manifest['services'][0]['restart'] = {'limit': 1, 'interval': 30} with open(finished_file, 'w') as f: f.write('1000 1 0\n') f.write('1950 1 0\n') self.assertTrue(app_presence.start_service('web_server')) # Last restart in less than 30 sec, should be *not* restarted with open(finished_file, 'w') as f: f.write('1000 1 0\n') f.write('1001 1 0\n') f.write('1980 1 0\n') self.assertFalse(app_presence.start_service('web_server')) # Confirm that limit: 0 does not allow *any* exit manifest['services'][0]['restart'] = {'limit': 0, 'interval': 60} time.time.return_value = 2000 with open(finished_file, 'w') as f: f.write('1000 1 0\n') f.write('1001 1 0\n') f.write('1002 1 0\n') f.write('1003 1 0\n') f.write('1004 1 0\n') self.assertFalse(app_presence.start_service('web_server'))
def test_update_exit_status(self): """Verifies reading the finished file and updating task status.""" manifest = { 'vip': { 'ip0': '192.168.0.1', 'ip1': '192.168.0.2' }, 'task': 't-0001', 'name': 'foo.test1#0001', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [{ 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 3 } }, { 'command': 'sshd -D -f /etc/ssh/sshd_config', 'name': 'sshd', 'proid': None }], 'endpoints': [{ 'port': 22, 'name': 'ssh', 'real_port': 5001 }, { 'port': 8000, 'name': 'http', 'real_port': 5000 }] } treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') with open(finished_file, 'a+') as f: f.write('1000 1 0\n') app_presence.update_exit_status('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, events.ServiceExitedTraceEvent(instanceid='foo.test1#0001', uniqueid='AAAAAA', service='web_server', rc=1, signal=0)) kazoo.client.KazooClient.create.reset_mock() with open(finished_file, 'a+') as f: f.write('2000 9 255\n') app_presence.update_exit_status('web_server') treadmill.appevents.post.assert_called_with( self.events_dir, events.ServiceExitedTraceEvent(instanceid='foo.test1#0001', uniqueid='AAAAAA', service='web_server', rc=9, signal=255)) reported_file = os.path.join(self.root, 'services', 'web_server', 'reported') self.assertTrue(os.path.exists(reported_file)) # Calling update state twice is no-op, as reported file is newer. kazoo.client.KazooClient.create.reset_mock() app_presence.update_exit_status('web_server') self.assertFalse(kazoo.client.KazooClient.create.called)
def test_app_exit(self): """Verifies app deletion on service exit.""" manifest = { 'vip': { 'ip0': '192.168.0.1', 'ip1': '192.168.0.2' }, 'task': 't-0001', 'name': 'foo.test1#0001', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [{ 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 3 } }, { 'command': 'sshd -D -f /etc/ssh/sshd_config', 'name': 'sshd', 'proid': None }], 'endpoints': [{ 'port': 22, 'name': 'ssh', 'real_port': 5001 }, { 'port': 8000, 'name': 'http', 'real_port': 5000 }] } services_dir = os.path.join(self.root, 'services') os.mkdir(services_dir) treadmill.sysinfo.hostname.return_value = 'server1.xx.com' app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) app_presence.services['web_server']['last_exit'] = { 'rc': 1, 'sig': 3, } app_presence.exit_app('web_server') self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo'))) self.assertEquals( yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), { 'rc': 1, 'sig': 3, 'service': 'web_server', 'killed': False, 'oom': False }) del app_presence.services['web_server']['last_exit'] app_presence.exit_app('web_server') self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo'))) self.assertEquals( yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), { 'service': 'web_server', 'killed': False, 'oom': False })
def test_start_service(self): """Verifies restart/finish file interaction.""" manifest = { 'vip': { 'ip0': '192.168.0.1', 'ip1': '192.168.0.2' }, 'task': 't-0001', 'name': 'foo.test1', 'uniqueid': 'AAAAAA', 'proid': 'andreik', 'services': [{ 'command': '/usr/bin/python -m SimpleHTTPServer', 'name': 'web_server', 'restart': { 'interval': 60, 'limit': 3 } }, { 'command': 'sshd -D -f /etc/ssh/sshd_config', 'name': 'sshd', 'restart': { 'interval': 60, 'limit': 3 }, 'proid': None }], 'endpoints': [{ 'port': 22, 'name': 'ssh', 'real_port': 5001 }, { 'port': 8000, 'name': 'http', 'real_port': 5000 }] } app_presence = presence.ServicePresence(manifest, container_dir=self.root, appevents_dir=self.events_dir) self.assertTrue(app_presence.start_service('web_server')) os.mkdir(os.path.join(self.root, 'services')) os.mkdir(os.path.join(self.root, 'services', 'web_server')) finished_file = os.path.join(self.root, 'services', 'web_server', 'finished') # App will be restarted, since it exits outside of its interval. time.time.return_value = 1001 with open(finished_file, 'a+') as f: f.write('1000 1 0\n') self.assertTrue(app_presence.start_service('web_server')) time.time.return_value = 2001 with open(finished_file, 'a+') as f: f.write('2000 1 0\n') self.assertTrue(app_presence.start_service('web_server')) time.time.return_value = 3001 with open(finished_file, 'a+') as f: f.write('3000 1 0\n') self.assertTrue(app_presence.start_service('web_server')) time.time.return_value = 4001 with open(finished_file, 'a+') as f: f.write('4000 1 0\n') self.assertTrue(app_presence.start_service('web_server'))