예제 #1
0
def finish(tm_env, container_dir):
    """Frees allocated resources and mark then as available.
    """
    with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                       lc.ContainerAdapter):
        _LOGGER.info('finishing %r', container_dir)

        data_dir = os.path.join(container_dir, 'data')

        app = runtime.load_app(data_dir)
        if app is not None:
            _cleanup(tm_env, data_dir, app)
        else:
            app = runtime.load_app(data_dir, appcfg.APP_JSON)

        if app is not None:
            # Check if application reached restart limit inside the container.
            #
            # The container directory will be moved, this check is done first.
            #
            # If restart limit was reached, application node will be removed
            # from Zookeeper at the end of the cleanup process, indicating to
            # the scheduler that the server is ready to accept new load.
            exitinfo, aborted, oom, terminated = _collect_exit_info(data_dir)

            # All resources are cleaned up. If the app terminated inside the
            # container, remove the node from Zookeeper, which will notify the
            # scheduler that it is safe to reuse the host for other load.
            if aborted is not None:
                app_abort.report_aborted(tm_env,
                                         app.name,
                                         why=aborted.get('why'),
                                         payload=aborted.get('payload'))

            elif oom:
                _post_oom_event(tm_env, app)

            elif terminated:
                # Terminated (or evicted).
                # Don't post event, this is handled by the master.
                _LOGGER.info('Terminated: %s', app.name)

            else:
                # Container finished because service exited.
                # It is important that this is checked last.
                if exitinfo is not None:
                    _post_exit_event(tm_env, app, exitinfo)

        # cleanup monitor with container information
        if app:
            apphook.cleanup(tm_env, app, container_dir)
예제 #2
0
파일: runtime.py 프로젝트: linbai/treadmill
    def _finish(self):
        app = runtime.load_app(self._service.data_dir, runtime.STATE_JSON)

        if app:
            client = self._get_client()
            container = state = None
            name = appcfg.app_unique_name(app)
            try:
                container = client.containers.get(name)
                state = container.attrs.get('State')
            except docker.errors.NotFound:
                pass

            if container is not None:
                try:
                    container.remove(force=True)
                except docker.errors.APIError:
                    _LOGGER.error('Failed to remove %s', container.id)

            aborted = _check_aborted(self._service.data_dir)
            if aborted is not None:
                app_abort.report_aborted(self._tm_env,
                                         app.name,
                                         why=aborted.get('why'),
                                         payload=aborted.get('payload'))

            elif state is not None:
                if state.get('OOMKilled', False):
                    event = events.KilledTraceEvent(
                        instanceid=app.name,
                        is_oom=True,
                    )
                else:
                    event = events.FinishedTraceEvent(instanceid=app.name,
                                                      rc=state.get(
                                                          'ExitCode', 256),
                                                      signal=0,
                                                      payload=state)

                trace.post(self._tm_env.app_events_dir, event)

            if os.name == 'nt':
                credential_spec.cleanup(name, client)

            try:
                runtime.archive_logs(self._tm_env, name,
                                     self._service.data_dir)
            except Exception:  # pylint: disable=W0703
                _LOGGER.exception('Unexpected exception storing local logs.')
예제 #3
0
    def _configure(self, instance_name):
        """Configures and starts the instance based on instance cached event.

        - Runs app_configure --approot <rootdir> cache/<instance>

        :param ``str`` instance_name:
            Name of the instance to configure
        :returns ``bool``:
            True for successfully configured container.
        """
        event_file = os.path.join(
            self.tm_env.cache_dir,
            instance_name
        )

        with lc.LogContext(_LOGGER, instance_name):
            try:
                _LOGGER.info('Configuring')
                container_dir = app_cfg.configure(self.tm_env, event_file,
                                                  self._runtime)
                if container_dir is None:
                    # configure step failed, skip.
                    fs.rm_safe(event_file)
                    return False

                # symlink_safe(link, target)
                fs.symlink_safe(
                    os.path.join(self.tm_env.running_dir, instance_name),
                    container_dir
                )
                return True

            except exc.ContainerSetupError as err:  # pylint: disable=W0703
                _LOGGER.exception('Error configuring (%r)', instance_name)
                app_abort.report_aborted(self.tm_env, instance_name,
                                         why=err.reason,
                                         payload=traceback.format_exc())
                fs.rm_safe(event_file)
                return False
            except Exception as err:  # pylint: disable=W0703
                _LOGGER.exception('Error configuring (%r)', instance_name)
                app_abort.report_aborted(self.tm_env, instance_name,
                                         why=app_abort.AbortedReason.UNKNOWN,
                                         payload=traceback.format_exc())
                fs.rm_safe(event_file)
                return False
예제 #4
0
    def test_report_aborted(self):
        """Tests report abort sequence."""
        context.GLOBAL.zk.url = 'zookeeper://xxx@hhh:123/treadmill/mycell'
        treadmill.zkutils.connect.return_value = kazoo.client.KazooClient()
        kazoo.client.KazooClient.get_children.return_value = []
        kazoo.client.KazooClient.exists.return_value = True

        kazoo.client.KazooClient.create.reset()
        kazoo.client.KazooClient.delete.reset()

        app_abort.report_aborted(self.tm_env, 'proid.myapp#001',
                                 why=app_abort.AbortedReason.TICKETS,
                                 payload='test')
        treadmill.appevents.post.assert_called_with(
            mock.ANY,
            events.AbortedTraceEvent(
                instanceid='proid.myapp#001',
                why='tickets',
                payload='test',
            ),
        )
예제 #5
0
def _post_aborted_event(tm_env, appname, aborted):
    """Port aborted event."""
    app_abort.report_aborted(tm_env,
                             appname,
                             why=aborted.get('why'),
                             payload=aborted.get('payload'))