Example #1
0
 def test_aborted(self):
     """Aborted event operations.
     """
     event = events.AbortedTraceEvent(timestamp=1,
                                      source='tests',
                                      instanceid='proid.foo#123',
                                      why='reason',
                                      payload='test')
     self.assertEqual(
         event.to_dict(), {
             'event_type': 'aborted',
             'timestamp': 1,
             'source': 'tests',
             'instanceid': 'proid.foo#123',
             'why': 'reason',
             'payload': 'test',
         })
     self.assertEqual(event.to_data(), (
         1,
         'tests',
         'proid.foo#123',
         'aborted',
         'reason',
         'test',
     ))
     self.assertEqual(
         event,
         events.AbortedTraceEvent.from_data(timestamp=1,
                                            source='tests',
                                            instanceid='proid.foo#123',
                                            event_type='aborted',
                                            event_data='reason',
                                            payload='test'))
Example #2
0
 def _abort_task(self, appname, exception):
     """Set task into aborted state in case of scheduling error."""
     if self.events_dir:
         appevents.post(
             self.events_dir,
             traceevents.AbortedTraceEvent(instanceid=appname,
                                           why=type(exception).__name__))
Example #3
0
    def start_container(container_root, manifest):
        """Treadmill container boot process.
        """
        _LOGGER.info('Initializing container: %s', container_root)
        app = app_manifest.read(manifest)

        try:
            pivot_root.make_root(container_root)
            os.chdir('/')
        except Exception as err:  # pylint: disable=broad-except
            event = traceevents.AbortedTraceEvent(
                instanceid=app['name'],
                why=app_abort.AbortedReason.PIVOT_ROOT.value,
                payload=str(err),
            )

            _abort(event, container_root)

            # reraise err to exit start_container
            raise err

        # XXX: Debug info
        _LOGGER.debug('Current mounts: %s',
                      pprint.pformat(fs_linux.list_mounts()))

        # Clean the environ
        # TODO: Remove me once clean environment management is merged in.
        os.environ.pop('PYTHONPATH', None)
        os.environ.pop('LC_ALL', None)
        os.environ.pop('LANG', None)

        # Clear aliases path.
        os.environ.pop('TREADMILL_ALIASES_PATH', None)

        subproc.safe_exec(['s6_svscan', '-s', '/services'])
Example #4
0
 def _abort_task(self, appname, exception):
     """Set task into aborted state in case of scheduling error."""
     if self.events_dir:
         appevents.post(
             self.events_dir,
             traceevents.AbortedTraceEvent(instanceid=appname,
                                           why=app_abort.SCHEDULER,
                                           payload=exception))
Example #5
0
    def test_post_zk(self):
        """Test appevents.post.zk."""
        zkclient_mock = mock.Mock()
        zkclient_mock.get_children.return_value = []

        appevents.post_zk(
            zkclient_mock,
            events.PendingTraceEvent(
                instanceid='foo.bar#123',
                why='created',
                payload=''
            )
        )
        zkclient_mock.create.assert_called_once_with(
            '/trace/007B/foo.bar#123,100,baz,pending,created',
            b'',
            ephemeral=False, makepath=True, sequence=False,
            acl=mock.ANY
        )
        zkclient_mock.reset_mock()

        appevents.post_zk(
            zkclient_mock,
            events.PendingDeleteTraceEvent(
                instanceid='foo.bar#123',
                why='deleted'
            )
        )
        zkclient_mock.create.assert_called_once_with(
            '/trace/007B/foo.bar#123,100,baz,pending_delete,deleted',
            b'',
            ephemeral=False, makepath=True, sequence=False,
            acl=mock.ANY
        )
        zkclient_mock.reset_mock()

        appevents.post_zk(
            zkclient_mock,
            events.AbortedTraceEvent(
                instanceid='foo.bar#123',
                why='test'
            )
        )
        zkclient_mock.create.assert_has_calls([
            mock.call(
                '/trace/007B/foo.bar#123,100,baz,aborted,test',
                b'',
                ephemeral=False, makepath=True, sequence=False,
                acl=mock.ANY
            ),
            mock.call(
                '/finished/foo.bar#123',
                b'{data: test, host: baz, state: aborted, when: \'100\'}\n',
                ephemeral=False, makepath=True, sequence=False,
                acl=mock.ANY
            )
        ])
Example #6
0
 def test_post(self):
     """Test appevents.post."""
     appevents.post(
         self.root,
         events.AbortedTraceEvent(why='container_error',
                                  instanceid='foo.bar#123',
                                  payload=None))
     self.assertTrue(
         os.path.exists(
             os.path.join(self.root,
                          '100,foo.bar#123,aborted,container_error')))
Example #7
0
def report_aborted(tm_env, instance, why=None, payload=None):
    """Report an aborted instance.

    Called when aborting after failed configure step or from cleanup.
    """
    if payload is not None:
        payload = str(payload)

    appevents.post(
        tm_env.app_events_dir,
        events.AbortedTraceEvent(instanceid=instance,
                                 why=_why_str(why),
                                 payload=payload))
Example #8
0
def finish(tm_env, zkclient, container_dir, watchdog):
    """Frees allocated resources and mark then as available.
    """
    with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                       lc.ContainerAdapter) as log:
        log.info('finishing %r', container_dir)

        _stop_container(container_dir)

        # Check if application reached restart limit inside the container.
        #
        # The container directory will be moved, this check is done first.
        #
        # If restart limit was reached, application node will be removed from
        # Zookeeper at the end of the cleanup process, indicating to the
        # scheduler that the server is ready to accept new load.
        exitinfo, aborted, aborted_reason = _collect_exit_info(container_dir)

        app = runtime.load_app(container_dir)
        if app:
            _cleanup(tm_env, zkclient, container_dir, app)
        else:
            app = runtime.load_app(container_dir, appcfg.APP_JSON)

        if app:
            # All resources are cleaned up. If the app terminated inside the
            # container, remove the node from Zookeeper, which will notify the
            # scheduler that it is safe to reuse the host for other load.
            if aborted:
                appevents.post(
                    tm_env.app_events_dir,
                    events.AbortedTraceEvent(
                        instanceid=app.name,
                        why=None,  # TODO(boysson): extract this info
                        payload=aborted_reason
                    )
                )

            if exitinfo:
                _post_exit_event(tm_env, app, exitinfo)

        # cleanup monitor with container information
        if app:
            apphook.cleanup(tm_env, app)

        # Delete the app directory (this includes the tarball, if any)
        shutil.rmtree(container_dir)

        # cleanup was succesful, remove the watchdog
        watchdog.remove()
        log.logger.info('Finished cleanup: %s', container_dir)
Example #9
0
    def test_aborted(self, stdout_mock):
        """Test printing Aborted event.
        """
        event = events.AbortedTraceEvent(timestamp=1,
                                         source='tests',
                                         instanceid='proid.foo#123',
                                         why='unknown',
                                         payload='test')

        self.trace_printer.process(event)

        self.assertEqual(
            stdout_mock.getvalue(), 'Thu, 01 Jan 1970 00:00:01+0000 - '
            'proid.foo#123 aborted on tests [reason: unknown]\n')
Example #10
0
def abort(tm_env, event, exc=None, reason=None):
    """Abort a unconfigured application.

    Called when aborting after failed configure step.
    """
    # If aborting after failed configure step, the 'name' attribute is
    # derived from the event file name.
    instanceid = os.path.basename(event)
    _LOGGER.info('Aborting %s', instanceid)

    # Report start failure.
    if reason is None and exc:
        reason = type(exc).__name__

    appevents.post(
        tm_env.app_events_dir,
        events.AbortedTraceEvent(why=reason,
                                 instanceid=instanceid,
                                 payload=None))
Example #11
0
    def test_report_aborted(self):
        """Tests report abort sequence."""
        context.GLOBAL.zk.url = 'zookeeper://xxx@hhh:123/treadmill/mycell'
        treadmill.zkutils.connect.return_value = kazoo.client.KazooClient()
        kazoo.client.KazooClient.get_children.return_value = []
        kazoo.client.KazooClient.exists.return_value = True

        kazoo.client.KazooClient.create.reset()
        kazoo.client.KazooClient.delete.reset()

        app_abort.report_aborted(self.tm_env, 'proid.myapp#001',
                                 why=app_abort.AbortedReason.TICKETS,
                                 payload='test')
        treadmill.appevents.post.assert_called_with(
            mock.ANY,
            events.AbortedTraceEvent(
                instanceid='proid.myapp#001',
                why='tickets',
                payload='test',
            ),
        )
Example #12
0
    def test_abort(self):
        """Tests abort sequence."""
        context.GLOBAL.zk.url = 'zookeeper://xxx@hhh:123/treadmill/mycell'
        treadmill.zkutils.connect.return_value = kazoo.client.KazooClient()
        kazoo.client.KazooClient.get_children.return_value = []
        kazoo.client.KazooClient.exists.return_value = True

        # Check abort sequence when name is not part of the manifest, rather
        # derived from the manifest appname.
        manifest_file = os.path.join(self.root, 'schema', 'proid.myapp#001')

        kazoo.client.KazooClient.create.reset()
        kazoo.client.KazooClient.delete.reset()

        app_abort.abort(self.tm_env, manifest_file, exc=Exception('test'))
        treadmill.appevents.post.assert_called_with(
            mock.ANY,
            events.AbortedTraceEvent(
                instanceid='proid.myapp#001',
                why='Exception',
                payload=None,
            ),
        )
Example #13
0
    def test_finish_aborted(self):
        """Tests container finish procedure when node is aborted.
        """
        manifest = {
            'app':
            'proid.myapp',
            'cell':
            'test',
            'cpu':
            '100%',
            'disk':
            '100G',
            'environment':
            'dev',
            'host_ip':
            '172.31.81.67',
            'memory':
            '100M',
            'name':
            'proid.myapp#001',
            'proid':
            'foo',
            'shared_network':
            False,
            'task':
            '001',
            'uniqueid':
            '0000000ID1234',
            'archive': ['/var/tmp/treadmill'],
            'endpoints': [{
                'port': 8000,
                'name': 'http',
                'real_port': 5000,
                'proto': 'tcp',
            }],
            'services': [{
                'name': 'web_server',
                'command': '/bin/false',
                'restart': {
                    'limit': 3,
                    'interval': 60,
                },
            }],
            'ephemeral_ports': {
                'tcp': [],
                'udp': [],
            }
        }
        treadmill.appmgr.manifest.read.return_value = manifest
        app_unique_name = 'proid.myapp-001-0000000ID1234'
        mock_ld_client = self.app_env.svc_localdisk.make_client.return_value
        localdisk = {
            'block_dev': '/dev/foo',
        }
        mock_ld_client.get.return_value = localdisk
        mock_nwrk_client = self.app_env.svc_network.make_client.return_value
        network = {
            'vip': '192.168.0.2',
            'gateway': '192.168.254.254',
            'veth': 'testveth.0',
        }
        mock_nwrk_client.get.return_value = network
        app_dir = os.path.join(self.root, 'apps', app_unique_name)
        # Create content in app root directory, verify that it is archived.
        fs.mkdir_safe(os.path.join(app_dir, 'root', 'xxx'))
        fs.mkdir_safe(os.path.join(app_dir, 'services'))
        # Simulate daemontools finish script, marking the app is done.
        with open(os.path.join(app_dir, 'aborted'), 'w') as aborted:
            aborted.write('something went wrong')
        mock_zkclient = kazoo.client.KazooClient()

        app_finish.finish(self.app_env, mock_zkclient, app_dir)

        treadmill.appevents.post(
            mock.ANY,
            events.AbortedTraceEvent(instanceid='proid.myapp#001',
                                     why=None,
                                     payload={
                                         'why': 'something went wrong',
                                         'node': 'hostname',
                                     }))
        treadmill.rrdutils.flush_noexc.assert_called_with(
            os.path.join(self.root, 'metrics', 'apps',
                         app_unique_name + '.rrd'))
        shutil.copy.assert_called_with(
            os.path.join(self.root, 'metrics', 'apps',
                         app_unique_name + '.rrd'),
            os.path.join(app_dir, 'metrics.rrd'))
Example #14
0
def finish(tm_env, zkclient, container_dir):
    """Frees allocated resources and mark then as available.

    :param tm_env:
        Treadmill application environment
    :type tm_env:
        `appmgr.AppEnvironment`
    :param container_dir:
        Full path to the application container directory
    :type container_dir:
        ``str``
    """

    # FIXME(boysson): Clean should be done inside the container. The watchdog
    #                 value below is inflated to account for the extra
    #                 archiving time.
    name_dir = os.path.basename(container_dir)
    with lc.LogContext(_LOGGER, name_dir, lc.ContainerAdapter) as log:
        log.info('finishing %r', container_dir)
        watchdog_name = '{name}-{app}'.format(name=__name__, app=name_dir)
        watchdog = tm_env.watchdogs.create(
            watchdog_name, '5m', 'Cleanup of '
            '%r stalled' % container_dir)

        _stop_container(container_dir)

        # Check if application reached restart limit inside the container.
        #
        # The container directory will be moved, this check is done first.
        #
        # If restart limit was reached, application node will be removed from
        # Zookeeper at the end of the cleanup process, indicating to the
        # scheduler that the server is ready to accept new load.
        exitinfo, aborted, aborted_reason = _collect_exit_info(container_dir)

        app = _load_app(container_dir, _STATE_YML)
        if app:
            _cleanup(tm_env, zkclient, container_dir, app)
        else:
            app = _load_app(container_dir, _APP_YML)

        if app:
            # All resources are cleaned up. If the app terminated inside the
            # container, remove the node from Zookeeper, which will notify the
            # scheduler that it is safe to reuse the host for other load.
            if aborted:
                appevents.post(
                    tm_env.app_events_dir,
                    events.AbortedTraceEvent(
                        instanceid=app.name,
                        why=None,  # TODO(boysson): extract this info
                        payload=aborted_reason))

            if exitinfo:
                _post_exit_event(tm_env, app, exitinfo)

        # Delete the app directory (this includes the tarball, if any)
        shutil.rmtree(container_dir)

        # cleanup was succesful, remove the watchdog
        watchdog.remove()
        log.info('Finished cleanup: %s', container_dir)
Example #15
0
    def test_post(self):
        """Test appevents.post."""
        # Disable W0212(protected-access)
        # pylint: disable=W0212
        zkclient_mock = mock.Mock()
        zkclient_mock.get_children.return_value = []
        watcher = appevents.AppEventsWatcher(zkclient_mock, self.root)

        appevents.post(
            self.root,
            events.PendingTraceEvent(
                instanceid='foo.bar#123',
                why='created',
            ))
        path = os.path.join(self.root, '100,foo.bar#123,pending,created')
        self.assertTrue(os.path.exists(path))
        watcher._on_created(path)
        zkclient_mock.create.assert_called_once_with(
            '/trace/007B/foo.bar#123,100,baz,pending,created',
            b'',
            ephemeral=False,
            makepath=True,
            sequence=False,
            acl=mock.ANY)

        zkclient_mock.reset_mock()
        appevents.post(
            self.root,
            events.PendingDeleteTraceEvent(instanceid='foo.bar#123',
                                           why='deleted'))
        path = os.path.join(self.root,
                            '100,foo.bar#123,pending_delete,deleted')
        self.assertTrue(os.path.exists(path))
        watcher._on_created(path)
        zkclient_mock.create.assert_called_once_with(
            '/trace/007B/foo.bar#123,100,baz,pending_delete,deleted',
            b'',
            ephemeral=False,
            makepath=True,
            sequence=False,
            acl=mock.ANY)

        zkclient_mock.reset_mock()
        appevents.post(
            self.root,
            events.AbortedTraceEvent(instanceid='foo.bar#123', why='test'))
        path = os.path.join(self.root, '100,foo.bar#123,aborted,test')
        self.assertTrue(os.path.exists(path))
        watcher._on_created(path)
        self.assertEqual(zkclient_mock.create.call_args_list, [
            mock.call('/trace/007B/foo.bar#123,100,baz,aborted,test',
                      b'',
                      ephemeral=False,
                      makepath=True,
                      sequence=False,
                      acl=mock.ANY),
            mock.call(
                '/finished/foo.bar#123',
                b'{data: test, host: baz, state: aborted, when: \'100\'}\n',
                makepath=True,
                ephemeral=False,
                acl=mock.ANY,
                sequence=False)
        ])