Beispiel #1
0
    def test_default_restart_count(self):
        """Verifies restart/finish file interaction."""
        manifest = yaml.load("""
---
name: foo.test1
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        self.assertTrue(app_presence.start_service('web_server'))
        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')
        # App will run once.
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        self.assertFalse(app_presence.start_service('web_server'))
Beispiel #2
0
    def test_update_exit_status(self):
        """Verifies reading the finished file and updating task status."""
        manifest = yaml.load("""
---
name: foo.test1#0001
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
  restart_count: 3
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        app_presence.update_exit_status('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            'foo.test1#0001',
            'exit',
            'web_server.1.0',
        )

        kazoo.client.KazooClient.create.reset_mock()
        with open(finished_file, 'a+') as f:
            f.write('2000 9 255\n')
        app_presence.update_exit_status('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            'foo.test1#0001',
            'exit',
            'web_server.9.255',
        )

        reported_file = os.path.join(self.root, 'services', 'web_server',
                                     'reported')
        self.assertTrue(os.path.exists(reported_file))

        # Calling update state twice is no-op, as reported file is newer.
        kazoo.client.KazooClient.create.reset_mock()
        app_presence.update_exit_status('web_server')
        self.assertFalse(kazoo.client.KazooClient.create.called)
Beispiel #3
0
    def test_report_running(self):
        """Verifies report running sequence."""
        manifest = {
            'vip': {
                'ip0': '192.168.0.1',
                'ip1': '192.168.0.2'
            },
            'task':
            't-0001',
            'name':
            'foo.test1#0001',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [{
                'command': '/usr/bin/python -m SimpleHTTPServer',
                'name': 'web_server',
                'restart': {
                    'interval': 60,
                    'limit': 3
                }
            }, {
                'command': 'sshd -D -f /etc/ssh/sshd_config',
                'name': 'sshd',
                'proid': None
            }],
            'endpoints': [{
                'port': 22,
                'name': 'ssh',
                'real_port': 5001
            }, {
                'port': 8000,
                'name': 'http',
                'real_port': 5000
            }]
        }
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        kazoo.client.KazooClient.exists.return_value = False
        app_presence.report_running('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            events.ServiceRunningTraceEvent(instanceid='foo.test1#0001',
                                            uniqueid='AAAAAA',
                                            service='web_server'))

        kazoo.client.KazooClient.exists.return_value = True
        app_presence.report_running('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            events.ServiceRunningTraceEvent(instanceid='foo.test1#0001',
                                            uniqueid='AAAAAA',
                                            service='web_server'))
Beispiel #4
0
    def monitor(manifest, container_dir, appevents_dir):
        """Monitor container services."""
        app = yaml.load(manifest.read())
        with lc.LogContext(_LOGGER, app['name'], lc.ContainerAdapter) as log:
            svc_presence = presence.ServicePresence(
                app,
                container_dir,
                appevents_dir,
            )

            sys_dir = os.path.join(container_dir, 'sys')
            svc_sup_dir = os.path.join(sys_dir, 'start_container')

            failed_svc = None
            killed = False

            # Check that start_container was not terminated. This fixed race
            # condition if the presence exits and while restarted,
            # start_container is terminated.
            svc_sup_ran_once = os.path.exists(
                os.path.join(svc_sup_dir, 'self.pid'))
            log.info('services supervisor ran once: %s', svc_sup_ran_once)
            svc_sup_down = presence.is_down(svc_sup_dir)
            log.info('services supervisor down: %s', svc_sup_down)
            if svc_sup_down and svc_sup_ran_once:
                log.info('services supervisor was terminated, exiting.')
            else:
                svc_presence.ensure_supervisors_running()

                # Try to start the service, taking into account number of
                # restarts.
                # If the number of restarts is more than specified, delete app
                # from the model, which will trigger container shutdown.
                #
                # In case of container shutdown (application evicted from the
                # server), exit_app will not be called.
                while True:
                    success, failed_svc = svc_presence.start_all()
                    if not success:
                        break

                    svc_presence.wait_for_exit(svc_sup_dir)
                    if presence.is_down(svc_sup_dir):
                        log.info('Container services supervisor is down.')
                        failed_svc = None
                        killed = True
                        break

            svc_presence.exit_app(failed_svc, killed=killed)

            log.info('Shutting down sys supervisor.')
            subproc.call(['s6-svscanctl', '-pi', sys_dir])
Beispiel #5
0
    def test_exit_info(self):
        """Tests collection of exit info."""
        manifest = yaml.load("""
---
name: foo.test1#0001
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
  restart_count: 10000
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')

        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)
        ws_svc_dir = os.path.join(self.root, 'services', 'web_server')
        einfo, count = app_presence.exit_info(ws_svc_dir)
        self.assertEquals(1, count)
        self.assertEquals(1, einfo['rc'])
        self.assertEquals(0, einfo['sig'])
        self.assertFalse(einfo['oom'])

        with open(finished_file, 'a+') as f:
            f.write('1001 255 9\n')
        einfo, count = app_presence.exit_info(ws_svc_dir)
        self.assertEquals(2, count)
        self.assertEquals(255, einfo['rc'])
        self.assertEquals(9, einfo['sig'])
        self.assertFalse(einfo['oom'])

        open_name = '__builtin__.open'
        with mock.patch(open_name, mock.mock_open()) as mock_open:
            file_mock = mock.MagicMock(spec=file)
            file_mock.__enter__.return_value.read.return_value = '1'
            mock_open.return_value = file_mock
            self.assertTrue(presence.is_oom())
Beispiel #6
0
    def test_app_exit(self):
        """Verifies app deletion on service exit."""
        manifest = yaml.load("""
---
name: foo.test1#0001
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
  restart_count: 3
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")
        services_dir = os.path.join(self.root, 'services')
        os.mkdir(services_dir)

        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)
        app_presence.services['web_server']['last_exit'] = {
            'rc': 1,
            'sig': 3,
        }
        app_presence.exit_app('web_server')

        self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo')))
        self.assertEquals(
            yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), {
                'rc': 1,
                'sig': 3,
                'service': 'web_server',
                'killed': False,
                'oom': False
            })

        del app_presence.services['web_server']['last_exit']
        app_presence.exit_app('web_server')
        self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo')))
        self.assertEquals(
            yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), {
                'service': 'web_server',
                'killed': False,
                'oom': False
            })
Beispiel #7
0
    def test_restart_rate(self):
        """Verifies reading the finished file and updating task status."""
        manifest = yaml.load("""
---
name: foo.test1#0001
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
  restart_count: 10000
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')

        time.time.return_value = 1059
        # Five restarts in less than 60 sec.
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
            f.write('1001 1 0\n')
            f.write('1002 1 0\n')
            f.write('1003 1 0\n')
            f.write('1059 1 0\n')

        self.assertFalse(app_presence.start_service('web_server'))

        # Fifth restart is 100 sec away.
        time.time.return_value = 1105
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
            f.write('1100 1 0\n')
            f.write('1102 1 0\n')
            f.write('1103 1 0\n')
            f.write('1104 1 0\n')

        self.assertTrue(app_presence.start_service('web_server'))
Beispiel #8
0
    def test_report_running(self):
        """Verifies report running sequence."""
        manifest = yaml.load("""
---
name: foo.test1#0001
proid: andreik
services:
- command: /usr/bin/python -m SimpleHTTPServer
  name: web_server
  restart_count: 3
- command: sshd -D -f /etc/ssh/sshd_config
  endpoints:
  name: sshd
  proid: ~
endpoints:
- {name: ssh,  port: 22,   real_port: 5001}
- {name: http, port: 8000, real_port: 5000}
vip: {ip0: 192.168.0.1, ip1: 192.168.0.2}
task: t-0001
""")
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        kazoo.client.KazooClient.exists.return_value = False
        app_presence.report_running('web_server')
        treadmill.appevents.post.assert_called_with(self.events_dir,
                                                    'foo.test1#0001',
                                                    'running', 'web_server')

        kazoo.client.KazooClient.exists.return_value = True
        app_presence.report_running('web_server')
        treadmill.appevents.post.assert_called_with(self.events_dir,
                                                    'foo.test1#0001',
                                                    'running', 'web_server')
Beispiel #9
0
    def test_exit_info(self):
        """Tests collection of exit info."""
        manifest = {
            'vip': {
                'ip0': '192.168.0.1',
                'ip1': '192.168.0.2'
            },
            'task':
            't-0001',
            'name':
            'foo.test1#0001',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [{
                'command': '/usr/bin/python -m SimpleHTTPServer',
                'name': 'web_server',
                'restart': {
                    'interval': 60,
                    'limit': 3
                }
            }, {
                'command': 'sshd -D -f /etc/ssh/sshd_config',
                'name': 'sshd',
                'proid': None
            }],
            'endpoints': [{
                'port': 22,
                'name': 'ssh',
                'real_port': 5001
            }, {
                'port': 8000,
                'name': 'http',
                'real_port': 5000
            }]
        }

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')

        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)
        ws_svc_dir = os.path.join(self.root, 'services', 'web_server')
        einfo, count = app_presence.exit_info(ws_svc_dir)
        self.assertEquals(1, count)
        self.assertEquals(1, einfo['rc'])
        self.assertEquals(0, einfo['sig'])
        self.assertFalse(einfo['oom'])

        with open(finished_file, 'a+') as f:
            f.write('1001 255 9\n')
        einfo, count = app_presence.exit_info(ws_svc_dir)
        self.assertEquals(2, count)
        self.assertEquals(255, einfo['rc'])
        self.assertEquals(9, einfo['sig'])
        self.assertFalse(einfo['oom'])

        open_name = '__builtin__.open'
        with mock.patch(open_name, mock.mock_open()) as mock_open:
            file_mock = mock.MagicMock(spec=file)
            file_mock.__enter__.return_value.read.return_value = '1'
            mock_open.return_value = file_mock
            self.assertTrue(presence.is_oom())
Beispiel #10
0
    def test_restart_rate(self):
        """Verifies reading the finished file and updating task status."""
        manifest = {
            'task':
            't-0001',
            'name':
            'foo.test1#0001',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [
                {
                    'command': '/usr/bin/python -m SimpleHTTPServer',
                    'name': 'web_server',
                    'restart': {
                        'interval': 60,
                        'limit': 5
                    }
                },
            ],
        }
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')

        time.time.return_value = 1059
        # Five restarts in less than 60 sec, service should not be restarted
        with open(finished_file, 'w') as f:
            f.write('1000 1 0\n')
            f.write('1001 1 0\n')
            f.write('1002 1 0\n')
            f.write('1003 1 0\n')
            f.write('1059 1 0\n')

        self.assertFalse(app_presence.start_service('web_server'))

        # Fifth restart is 105 sec away, service should be restarted
        time.time.return_value = 1105
        with open(finished_file, 'w') as f:
            f.write('1000 1 0\n')
            f.write('1101 1 0\n')
            f.write('1102 1 0\n')
            f.write('1103 1 0\n')
            f.write('1104 1 0\n')

        self.assertTrue(app_presence.start_service('web_server'))

        time.time.return_value = 2000
        # Last restart in more than 30 sec, should be restarted
        manifest['services'][0]['restart'] = {'limit': 1, 'interval': 30}
        with open(finished_file, 'w') as f:
            f.write('1000 1 0\n')
            f.write('1950 1 0\n')

        self.assertTrue(app_presence.start_service('web_server'))

        # Last restart in less than 30 sec, should be *not* restarted
        with open(finished_file, 'w') as f:
            f.write('1000 1 0\n')
            f.write('1001 1 0\n')
            f.write('1980 1 0\n')

        self.assertFalse(app_presence.start_service('web_server'))

        # Confirm that limit: 0 does not allow *any* exit
        manifest['services'][0]['restart'] = {'limit': 0, 'interval': 60}
        time.time.return_value = 2000
        with open(finished_file, 'w') as f:
            f.write('1000 1 0\n')
            f.write('1001 1 0\n')
            f.write('1002 1 0\n')
            f.write('1003 1 0\n')
            f.write('1004 1 0\n')

        self.assertFalse(app_presence.start_service('web_server'))
Beispiel #11
0
    def test_update_exit_status(self):
        """Verifies reading the finished file and updating task status."""
        manifest = {
            'vip': {
                'ip0': '192.168.0.1',
                'ip1': '192.168.0.2'
            },
            'task':
            't-0001',
            'name':
            'foo.test1#0001',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [{
                'command': '/usr/bin/python -m SimpleHTTPServer',
                'name': 'web_server',
                'restart': {
                    'interval': 60,
                    'limit': 3
                }
            }, {
                'command': 'sshd -D -f /etc/ssh/sshd_config',
                'name': 'sshd',
                'proid': None
            }],
            'endpoints': [{
                'port': 22,
                'name': 'ssh',
                'real_port': 5001
            }, {
                'port': 8000,
                'name': 'http',
                'real_port': 5000
            }]
        }
        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        app_presence.update_exit_status('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            events.ServiceExitedTraceEvent(instanceid='foo.test1#0001',
                                           uniqueid='AAAAAA',
                                           service='web_server',
                                           rc=1,
                                           signal=0))

        kazoo.client.KazooClient.create.reset_mock()
        with open(finished_file, 'a+') as f:
            f.write('2000 9 255\n')
        app_presence.update_exit_status('web_server')
        treadmill.appevents.post.assert_called_with(
            self.events_dir,
            events.ServiceExitedTraceEvent(instanceid='foo.test1#0001',
                                           uniqueid='AAAAAA',
                                           service='web_server',
                                           rc=9,
                                           signal=255))

        reported_file = os.path.join(self.root, 'services', 'web_server',
                                     'reported')
        self.assertTrue(os.path.exists(reported_file))

        # Calling update state twice is no-op, as reported file is newer.
        kazoo.client.KazooClient.create.reset_mock()
        app_presence.update_exit_status('web_server')
        self.assertFalse(kazoo.client.KazooClient.create.called)
Beispiel #12
0
    def test_app_exit(self):
        """Verifies app deletion on service exit."""
        manifest = {
            'vip': {
                'ip0': '192.168.0.1',
                'ip1': '192.168.0.2'
            },
            'task':
            't-0001',
            'name':
            'foo.test1#0001',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [{
                'command': '/usr/bin/python -m SimpleHTTPServer',
                'name': 'web_server',
                'restart': {
                    'interval': 60,
                    'limit': 3
                }
            }, {
                'command': 'sshd -D -f /etc/ssh/sshd_config',
                'name': 'sshd',
                'proid': None
            }],
            'endpoints': [{
                'port': 22,
                'name': 'ssh',
                'real_port': 5001
            }, {
                'port': 8000,
                'name': 'http',
                'real_port': 5000
            }]
        }
        services_dir = os.path.join(self.root, 'services')
        os.mkdir(services_dir)

        treadmill.sysinfo.hostname.return_value = 'server1.xx.com'
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)
        app_presence.services['web_server']['last_exit'] = {
            'rc': 1,
            'sig': 3,
        }
        app_presence.exit_app('web_server')

        self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo')))
        self.assertEquals(
            yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), {
                'rc': 1,
                'sig': 3,
                'service': 'web_server',
                'killed': False,
                'oom': False
            })

        del app_presence.services['web_server']['last_exit']
        app_presence.exit_app('web_server')
        self.assertTrue(os.path.exists(os.path.join(self.root, 'exitinfo')))
        self.assertEquals(
            yaml.load(open(os.path.join(self.root, 'exitinfo')).read()), {
                'service': 'web_server',
                'killed': False,
                'oom': False
            })
Beispiel #13
0
    def test_start_service(self):
        """Verifies restart/finish file interaction."""
        manifest = {
            'vip': {
                'ip0': '192.168.0.1',
                'ip1': '192.168.0.2'
            },
            'task':
            't-0001',
            'name':
            'foo.test1',
            'uniqueid':
            'AAAAAA',
            'proid':
            'andreik',
            'services': [{
                'command': '/usr/bin/python -m SimpleHTTPServer',
                'name': 'web_server',
                'restart': {
                    'interval': 60,
                    'limit': 3
                }
            }, {
                'command': 'sshd -D -f /etc/ssh/sshd_config',
                'name': 'sshd',
                'restart': {
                    'interval': 60,
                    'limit': 3
                },
                'proid': None
            }],
            'endpoints': [{
                'port': 22,
                'name': 'ssh',
                'real_port': 5001
            }, {
                'port': 8000,
                'name': 'http',
                'real_port': 5000
            }]
        }
        app_presence = presence.ServicePresence(manifest,
                                                container_dir=self.root,
                                                appevents_dir=self.events_dir)

        self.assertTrue(app_presence.start_service('web_server'))
        os.mkdir(os.path.join(self.root, 'services'))
        os.mkdir(os.path.join(self.root, 'services', 'web_server'))
        finished_file = os.path.join(self.root, 'services', 'web_server',
                                     'finished')
        # App will be restarted, since it exits outside of its interval.
        time.time.return_value = 1001
        with open(finished_file, 'a+') as f:
            f.write('1000 1 0\n')
        self.assertTrue(app_presence.start_service('web_server'))

        time.time.return_value = 2001
        with open(finished_file, 'a+') as f:
            f.write('2000 1 0\n')
        self.assertTrue(app_presence.start_service('web_server'))

        time.time.return_value = 3001
        with open(finished_file, 'a+') as f:
            f.write('3000 1 0\n')
        self.assertTrue(app_presence.start_service('web_server'))

        time.time.return_value = 4001
        with open(finished_file, 'a+') as f:
            f.write('4000 1 0\n')
        self.assertTrue(app_presence.start_service('web_server'))