コード例 #1
0
 def test_nested_adapters(self):
     """Test whether adapters can be "nested"."""
     with lc.LogContext(logging.getLogger(__name__),
                        'proid.app#123') as outer:
         outer.info('foo')
         with lc.LogContext(outer, 'some.ting#123') as inner:
             inner.info('bar')
コード例 #2
0
    def on_delete_request(self, rsrc_id):
        instance_id = rsrc_id
        apps_group = cgutils.apps_group_name(self._cgroup_prefix)
        cgrp = os.path.join(apps_group, instance_id)

        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            self._unregister_oom_handler(cgrp)

            log.info('Deleting cgroups: %s:%s', self.SUBSYSTEMS, cgrp)
            for subsystem in self.SUBSYSTEMS:
                cgutils.delete(subsystem, cgrp)

        # Recalculate the cgroup hard limits on remaining apps
        #
        # TODO: commented out until proper fix implemented.
        #
        # expunged = cgutils.reset_memory_limit_in_bytes()
        # for expunged_uniq_name in expunged:
        #     exp_app_dir = os.path.join(tm_env.apps_dir, expunged_uniq_name)
        #     with open(os.path.join(exp_app_dir,
        #                            'services', 'finished'), 'w') as f:
        #         f.write('oom')
        #     exp_cgrp = os.path.join('treadmill', 'apps', expunged_uniq_name)
        #     cgutils.kill_apps_in_cgroup('memory', exp_cgrp,
        #                                 delete_cgrp=False)

        return True
コード例 #3
0
 def _refresh_supervisor(self, instance_names=()):
     """Notify the supervisor of new instances to run."""
     subproc.check_call(['s6_svscanctl', '-an', self.tm_env.running_dir])
     for instance_name in instance_names:
         with lc.LogContext(_LOGGER, instance_name):
             _LOGGER.info('Starting')
             instance_run_link = os.path.join(self.tm_env.running_dir,
                                              instance_name)
             # Wait for the supervisor to pick up the new instance.
             for _ in range(10):
                 res = subproc.call([
                     's6_svok',
                     instance_run_link,
                 ])
                 if res == 0:
                     break
                 else:
                     _LOGGER.warning('Supervisor has not picked it up yet')
                     time.sleep(0.5)
             # Bring the instance up.
             subproc.check_call([
                 's6_svc',
                 '-uO',
                 instance_run_link,
             ])
コード例 #4
0
    def run(approot, runtime, container_dir):
        """Runs container given a container dir."""
        # Intercept SIGTERM from s6 supervisor, so that initialization is not
        # left in broken state.
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            terminated = utils.make_signal_flag(utils.term_signal())
            tm_env = None
            try:
                log.info('run %r %r', approot, container_dir)
                tm_env = appenv.AppEnvironment(approot)

                app_runtime.get_runtime(runtime, tm_env,
                                        container_dir).run(terminated)

                # If we reach here, the application was terminated.

            except Exception as exc:  # pylint: disable=W0703
                if not terminated:
                    log.critical('Failed to start, app will be aborted.',
                                 exc_info=True)
                    app_abort.flag_aborted(tm_env, container_dir, exc)
                else:
                    log.logger.info('Exception while handling term, ignore.',
                                    exc_info=True)
コード例 #5
0
ファイル: local.py プロジェクト: GrammaB/treadmill
            def _get_all(log_id):
                """Return a file-like object with all the log entries including
                the rotated ones.
                """
                instance, uniq, logtype, component = log_id.split('/')

                with lc.LogContext(_LOGGER, '{}/{}'.format(instance, uniq)):
                    rel_log_dir = _rel_log_dir_path(logtype, component)
                    abs_log_dir = _abs_log_dir_path(tm_env, instance, uniq,
                                                    rel_log_dir)

                    _LOGGER.info('Check logs in {}'.format(abs_log_dir))
                    if os.path.exists(abs_log_dir):
                        logs = glob.glob(os.path.join(abs_log_dir, '@*.s'))
                        logs.append(os.path.join(abs_log_dir, 'current'))

                        # alphanumerical sort results in chronological order
                        # as per
                        # https://skarnet.org/software/skalibs/libstddjb/tai.html
                        return _concat_files(sorted(logs))

                    if uniq == 'running':
                        raise exc.LocalFileNotFoundError(
                            'No log could be found for {}.'.format(log_id))

                    logs = _extract_archive(_archive_path(
                        tm_env, logtype, instance, uniq),
                                            extract_filter=functools.partial(
                                                _arch_log_filter,
                                                rel_log_dir=rel_log_dir))

                    # alphanumerical sort results in chronological order
                    # as per
                    # https://skarnet.org/software/skalibs/libstddjb/tai.html
                    return _concat_files(sorted(logs))
コード例 #6
0
    def _configure(self, instance_name):
        """Configures and starts the instance based on instance cached event.

        - Runs app_configure --approot <rootdir> cache/<instance>

        :param ``str`` instance_name:
            Name of the instance to configure
        :returns ``bool``:
            True for successfully configured container.
        """
        event_file = os.path.join(
            self.tm_env.cache_dir,
            instance_name
        )

        with lc.LogContext(_LOGGER, instance_name):
            try:
                _LOGGER.info('Configuring')
                container_dir = app_cfg.configure(self.tm_env, event_file)
                app_cfg.schedule(
                    container_dir,
                    os.path.join(self.tm_env.running_dir, instance_name)
                )
                return True

            except Exception as err:  # pylint: disable=W0703
                _LOGGER.exception('Error configuring (%r)', event_file)
                app_abort.abort(self.tm_env, event_file, err)
                fs.rm_safe(event_file)
                return False
コード例 #7
0
        def _on_created(path):
            """Callback invoked with new cleanup file appears."""
            fullpath = os.path.join(tm_env.cleanup_dir, path)
            with lc.LogContext(_LOGGER, os.path.basename(path),
                               lc.ContainerAdapter) as log:
                if not os.path.islink(fullpath):
                    log.logger.info('Ignore - not a link: %s', fullpath)
                    return

                container_dir = os.readlink(fullpath)
                log.logger.info('Cleanup: %s => %s', path, container_dir)
                if os.path.exists(container_dir):

                    try:
                        log.logger.info(
                            'invoking treadmill.TREADMILL_BIN script: %r',
                            treadmill.TREADMILL_BIN)
                        subprocess.check_call([
                            treadmill.TREADMILL_BIN, 'sproc', 'finish',
                            container_dir
                        ])
                    except subprocess.CalledProcessError:
                        log.logger.exception('Fatal error running %r.',
                                             treadmill.TREADMILL_BIN)
                        raise

                else:
                    log.logger.info('Container dir does not exist: %r',
                                    container_dir)

                os.unlink(fullpath)
コード例 #8
0
    def invoke(self, runtime, instance, runtime_param=None):
        """Actually do the cleanup of the instance.
        """
        cleanup_link = os.path.join(self.tm_env.cleanup_dir, instance)
        container_dir = os.readlink(cleanup_link)
        _LOGGER.info('Cleanup: %s => %s', instance, container_dir)
        if os.path.exists(container_dir):
            with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                               lc.ContainerAdapter) as log:
                try:
                    app_runtime.get_runtime(runtime, self.tm_env,
                                            container_dir,
                                            runtime_param).finish()
                except Exception:  # pylint: disable=W0703
                    if not os.path.exists(container_dir):
                        log.info('Container dir does not exist: %s',
                                 container_dir)
                    else:
                        log.exception('Fatal error running finish %r.',
                                      container_dir)
                        raise

        else:
            _LOGGER.info('Container dir does not exist: %r', container_dir)

        fs.rm_safe(cleanup_link)
コード例 #9
0
    def on_delete_request(self, rsrc_id):
        app_unique_name = rsrc_id

        with lc.LogContext(_LOGGER, rsrc_id):
            veth, _ = _device_from_rsrc_id(app_unique_name)

            try:
                netdev.dev_state(veth)
                netdev.link_del_veth(veth)

            except (OSError, IOError) as err:
                if err.errno != errno.ENOENT:
                    raise

            # Remove it from our state (if present)
            dev_info = self._devices.pop(app_unique_name, None)
            if dev_info is not None and 'ip' in dev_info:
                # Remove the environment mark on the IP
                if 'environment' in dev_info:
                    _delete_mark_rule(
                        dev_info['ip'],
                        dev_info['environment']
                    )
                # VIPs deallocation (the owner is the resource link)
                self._vips.free(app_unique_name, dev_info['ip'])

        return True
コード例 #10
0
    def finish(approot, runtime, container_dir):
        """Finish treadmill application on the node."""
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            log.info('finish (approot %s)', approot)
            tm_env = appenv.AppEnvironment(approot)

            app_runtime.get_runtime(runtime, tm_env, container_dir).finish()
コード例 #11
0
    def on_create_request(self, rsrc_id, rsrc_data):
        instance_id = rsrc_id
        memory_limit = rsrc_data['memory']
        cpu_limit = rsrc_data['cpu']

        cgrp = os.path.join('treadmill', 'apps', instance_id)

        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.info('Creating cgroups: %s:%s', self.SUBSYSTEMS, cgrp)
            for subsystem in self.SUBSYSTEMS:
                cgutils.create(subsystem, cgrp)

            # blkio settings
            #
            cgroups.set_value('blkio', cgrp, 'blkio.weight', 100)

            # memory settings
            #
            self._register_oom_handler(cgrp, instance_id)

            cgroups.set_value('memory', cgrp, 'memory.soft_limit_in_bytes',
                              memory_limit)

            # TODO: set hardlimit to app.memory and comment the
            #                reset_memory block until proper solution for
            #                cgroup race condition is implemented.
            cgutils.set_memory_hardlimit(cgrp, memory_limit)

            # expunged = cgutils.reset_memory_limit_in_bytes()
            # for expunged_uniq_name in expunged:
            #     exp_app_dir = os.path.join(tm_env.apps_dir,
            #                                expunged_uniq_name)
            #     with open(os.path.join(exp_app_dir,
            #                            'services', 'finished'), 'w') as f:
            #         f.write('oom')
            #     exp_cgrp = os.path.join('treadmill', 'apps',
            #                             expunged_uniq_name)
            #     cgutils.kill_apps_in_cgroup('memory', exp_cgrp,
            #                                 delete_cgrp=False)
            # cpu settings
            #

            # Calculate the value of cpu shares for the app.
            #
            # [treadmill/apps/cpu.shares] = <total bogomips allocated to TM>
            #
            # [treadmill/apps/<app>/cpu.shares] = app.cpu * BMIPS_PER_CPU
            #
            app_cpu_pcnt = utils.cpu_units(cpu_limit) / 100.
            app_bogomips = app_cpu_pcnt * sysinfo.BMIPS_PER_CPU
            app_cpu_shares = int(app_bogomips)

            log.info('created in cpu:%s with %s shares', cgrp, app_cpu_shares)
            cgroups.set_cpu_shares(cgrp, app_cpu_shares)

        return {subsystem: cgrp for subsystem in self.SUBSYSTEMS}
コード例 #12
0
ファイル: network_service.py プロジェクト: GrammaB/treadmill
    def on_create_request(self, rsrc_id, rsrc_data):
        """
        :returns ``dict``:
            Network IP `vip`, network device `veth`, IP gateway `gateway`.
        """
        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.debug('req: %r', rsrc_data)

            app_unique_name = rsrc_id
            environment = rsrc_data['environment']

            assert environment in _SET_BY_ENVIRONMENT, \
                'Unknown environment: %r' % environment

            veth0, veth1 = _device_from_rsrc_id(app_unique_name)

            if app_unique_name not in self._devices:
                # VIPs allocation (the owner is the resource link)
                ip = self._vips.alloc(rsrc_id)
                self._devices[app_unique_name] = {'ip': ip}
            else:
                # Re-read what IP we assigned before
                ip = self._devices[app_unique_name]['ip']

            if 'device' not in self._devices[app_unique_name]:
                # Create the interface pair
                netdev.link_add_veth(veth0, veth1)
                # Configure the links
                netdev.link_set_mtu(veth0, self.ext_mtu)
                netdev.link_set_mtu(veth1, self.ext_mtu)
                # Tag the interfaces
                netdev.link_set_alias(veth0, rsrc_id)
                netdev.link_set_alias(veth1, rsrc_id)
                # Add interface to the bridge
                netdev.bridge_addif(self._TMBR_DEV, veth0)
                netdev.link_set_up(veth0)
                # We keep veth1 down until inside the container

            # Record the new device in our state
            self._devices[app_unique_name] = _device_info(veth0)
            self._devices[app_unique_name].update({
                'ip': ip,
                'environment': environment,
            })

            # We can now mark ip traffic as belonging to the requested
            # environment.
            _add_mark_rule(ip, environment)

        result = {
            'vip': ip,
            'veth': veth1,
            'gateway': self._TM_IP,
            'external_ip': self.ext_ip,
        }
        return result
コード例 #13
0
    def _run(self, manifest):
        context.GLOBAL.zk.conn.add_listener(zkutils.exit_on_lost)

        with lc.LogContext(_LOGGER, self._service.name,
                           lc.ContainerAdapter) as log:
            log.info('Running %r', self._service.directory)

            _sockets = runtime.allocate_network_ports(
                '0.0.0.0', manifest
            )

            app = runtime.save_app(manifest, self._service.data_dir)

            app_presence = presence.EndpointPresence(
                context.GLOBAL.zk.conn,
                manifest
            )

            app_presence.register_identity()
            app_presence.register_running()

            try:
                client = self._get_client()

                try:
                    container = _create_container(
                        self._tm_env,
                        self._get_config(),
                        client,
                        app
                    )
                except docker.errors.ImageNotFound:
                    raise exc.ContainerSetupError(
                        'Image {0} was not found'.format(app.image),
                        app_abort.AbortedReason.IMAGE
                    )

                container.start()
                container.reload()

                _LOGGER.info('Container is running.')
                app_presence.register_endpoints()
                appevents.post(
                    self._tm_env.app_events_dir,
                    events.ServiceRunningTraceEvent(
                        instanceid=app.name,
                        uniqueid=app.uniqueid,
                        service='docker'
                    )
                )

                while container.status == 'running':
                    container.wait(timeout=10)
                    container.reload()
            finally:
                _LOGGER.info('Stopping zookeeper.')
                context.GLOBAL.zk.conn.stop()
コード例 #14
0
    def get(self, rsrc_id, timeframe, as_json=False):
        """Return the rrd metrics."""
        with lc.LogContext(_LOGGER, rsrc_id):
            _LOGGER.info('Get metrics')
            id_ = self._unpack_id(rsrc_id)
            file_ = self._get_rrd_file(**id_)

            if as_json:
                return rrdutils.get_json_metrics(file_, timeframe)

            return file_
コード例 #15
0
ファイル: presence_service.py プロジェクト: GrammaB/treadmill
    def on_delete_request(self, rsrc_id):
        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:

            log.info('Deleting presence: %s', rsrc_id)
            for path in self.state[rsrc_id]:
                self._safe_delete(path)

            del self.state[rsrc_id]

        return True
コード例 #16
0
ファイル: finish.py プロジェクト: vrautela/treadmill-workdir
    def finish(approot, runtime, container_dir, runtime_param):
        """Finish treadmill application on the node."""
        # Run with finish context as finish runs in cleanup.
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            log.info('finish (approot %s)', approot)
            tm_env = appenv.AppEnvironment(approot)

            param = utils.equals_list2dict(runtime_param or [])
            app_runtime.get_runtime(
                runtime, tm_env, container_dir, param
            ).finish()
コード例 #17
0
    def monitor(manifest, container_dir, appevents_dir):
        """Monitor container services."""
        app = yaml.load(manifest.read())
        with lc.LogContext(_LOGGER, app['name'], lc.ContainerAdapter) as log:
            svc_presence = presence.ServicePresence(
                app,
                container_dir,
                appevents_dir,
            )

            sys_dir = os.path.join(container_dir, 'sys')
            svc_sup_dir = os.path.join(sys_dir, 'start_container')

            failed_svc = None
            killed = False

            # Check that start_container was not terminated. This fixed race
            # condition if the presence exits and while restarted,
            # start_container is terminated.
            svc_sup_ran_once = os.path.exists(
                os.path.join(svc_sup_dir, 'self.pid'))
            log.info('services supervisor ran once: %s', svc_sup_ran_once)
            svc_sup_down = presence.is_down(svc_sup_dir)
            log.info('services supervisor down: %s', svc_sup_down)
            if svc_sup_down and svc_sup_ran_once:
                log.info('services supervisor was terminated, exiting.')
            else:
                svc_presence.ensure_supervisors_running()

                # Try to start the service, taking into account number of
                # restarts.
                # If the number of restarts is more than specified, delete app
                # from the model, which will trigger container shutdown.
                #
                # In case of container shutdown (application evicted from the
                # server), exit_app will not be called.
                while True:
                    success, failed_svc = svc_presence.start_all()
                    if not success:
                        break

                    svc_presence.wait_for_exit(svc_sup_dir)
                    if presence.is_down(svc_sup_dir):
                        log.info('Container services supervisor is down.')
                        failed_svc = None
                        killed = True
                        break

            svc_presence.exit_app(failed_svc, killed=killed)

            log.info('Shutting down sys supervisor.')
            subproc.call(['s6-svscanctl', '-pi', sys_dir])
コード例 #18
0
ファイル: runtime.py プロジェクト: linbai/treadmill
    def _run(self, manifest):
        context.GLOBAL.zk.conn.add_listener(zkutils.exit_on_lost)

        with lc.LogContext(_LOGGER, self._service.name,
                           lc.ContainerAdapter) as log:
            log.info('Running %r', self._service.directory)

            manifest['ephemeral_ports']['tcp'] = []
            manifest['ephemeral_ports']['udp'] = []

            _create_docker_log_symlink(self._service.data_dir)

            app = runtime.save_app(manifest, self._service.data_dir)

            volume_mapping = self._get_volume_mapping()

            app_presence = presence.EndpointPresence(context.GLOBAL.zk.conn,
                                                     manifest)

            app_presence.register_identity()
            app_presence.register_running()

            client = self._get_client()

            try:
                container = _create_container(self._tm_env, self._get_config(),
                                              client, app, volume_mapping)
            except docker.errors.ImageNotFound:
                raise exc.ContainerSetupError(
                    'Image {0} was not found'.format(app.image),
                    app_abort.AbortedReason.IMAGE)

            container.start()
            container.reload()

            _update_network_info_in_manifest(container, manifest)
            # needs to share manifest with container
            if volume_mapping:
                container_data_dir = next(iter(volume_mapping))
                runtime.save_app(manifest,
                                 container_data_dir,
                                 app_json='app.json')

            _LOGGER.info('Container is running.')
            app_presence.register_endpoints()
            trace.post(
                self._tm_env.app_events_dir,
                events.ServiceRunningTraceEvent(instanceid=app.name,
                                                uniqueid=app.uniqueid,
                                                service='docker'))

            _print_container_logs(container)
コード例 #19
0
def finish(tm_env, zkclient, container_dir, watchdog):
    """Frees allocated resources and mark then as available.
    """
    with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                       lc.ContainerAdapter) as log:
        log.info('finishing %r', container_dir)

        _stop_container(container_dir)

        # Check if application reached restart limit inside the container.
        #
        # The container directory will be moved, this check is done first.
        #
        # If restart limit was reached, application node will be removed from
        # Zookeeper at the end of the cleanup process, indicating to the
        # scheduler that the server is ready to accept new load.
        exitinfo, aborted, aborted_reason = _collect_exit_info(container_dir)

        app = runtime.load_app(container_dir)
        if app:
            _cleanup(tm_env, zkclient, container_dir, app)
        else:
            app = runtime.load_app(container_dir, appcfg.APP_JSON)

        if app:
            # All resources are cleaned up. If the app terminated inside the
            # container, remove the node from Zookeeper, which will notify the
            # scheduler that it is safe to reuse the host for other load.
            if aborted:
                appevents.post(
                    tm_env.app_events_dir,
                    events.AbortedTraceEvent(
                        instanceid=app.name,
                        why=None,  # TODO(boysson): extract this info
                        payload=aborted_reason
                    )
                )

            if exitinfo:
                _post_exit_event(tm_env, app, exitinfo)

        # cleanup monitor with container information
        if app:
            apphook.cleanup(tm_env, app)

        # Delete the app directory (this includes the tarball, if any)
        shutil.rmtree(container_dir)

        # cleanup was succesful, remove the watchdog
        watchdog.remove()
        log.logger.info('Finished cleanup: %s', container_dir)
コード例 #20
0
def finish(tm_env, container_dir):
    """Frees allocated resources and mark then as available.
    """
    with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                       lc.ContainerAdapter):
        _LOGGER.info('finishing %r', container_dir)

        data_dir = os.path.join(container_dir, 'data')

        app = runtime.load_app(data_dir)
        if app is not None:
            _cleanup(tm_env, data_dir, app)
        else:
            app = runtime.load_app(data_dir, appcfg.APP_JSON)

        if app is not None:
            # Check if application reached restart limit inside the container.
            #
            # The container directory will be moved, this check is done first.
            #
            # If restart limit was reached, application node will be removed
            # from Zookeeper at the end of the cleanup process, indicating to
            # the scheduler that the server is ready to accept new load.
            exitinfo, aborted, oom, terminated = _collect_exit_info(data_dir)

            # All resources are cleaned up. If the app terminated inside the
            # container, remove the node from Zookeeper, which will notify the
            # scheduler that it is safe to reuse the host for other load.
            if aborted is not None:
                app_abort.report_aborted(tm_env,
                                         app.name,
                                         why=aborted.get('why'),
                                         payload=aborted.get('payload'))

            elif oom:
                _post_oom_event(tm_env, app)

            elif terminated:
                # Terminated (or evicted).
                # Don't post event, this is handled by the master.
                _LOGGER.info('Terminated: %s', app.name)

            else:
                # Container finished because service exited.
                # It is important that this is checked last.
                if exitinfo is not None:
                    _post_exit_event(tm_env, app, exitinfo)

        # cleanup monitor with container information
        if app:
            apphook.cleanup(tm_env, app, container_dir)
コード例 #21
0
    def on_delete_request(self, rsrc_id):
        instance_id = rsrc_id
        apps_group = cgutils.apps_group_name(self._cgroup_prefix)
        cgrp = os.path.join(apps_group, instance_id)

        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            self._unregister_oom_handler(cgrp)

            log.info('Deleting cgroups: %s:%s', self.SUBSYSTEMS, cgrp)
            for subsystem in self.SUBSYSTEMS:
                cgutils.delete(subsystem, cgrp)

        return True
コード例 #22
0
    def put(self, rsrc_id, rsrc_data):
        """Request creation/update of a resource.

        :param `str` rsrc_id:
            Unique identifier for the requested resource.
        :param `str` rsrc_data:
            (New) Parameters for the requested resource.
        """
        req_dir = self._req_dirname(rsrc_id)
        fs.mkdir_safe(req_dir)

        with io.open(os.path.join(req_dir, REQ_FILE), 'w') as f:
            if os.name == 'posix':
                os.fchmod(f.fileno(), 0o644)
            yaml.dump(rsrc_data,
                      explicit_start=True,
                      explicit_end=True,
                      default_flow_style=False,
                      stream=f)

        req_uuid_file = os.path.join(req_dir, self._REQ_UID_FILE)
        try:
            with io.open(req_uuid_file) as f:
                svc_req_uuid = f.read().strip()
        except IOError as err:
            if err.errno == errno.ENOENT:
                svc_req_uuid = None
            else:
                raise

        with lc.LogContext(_LOGGER, rsrc_id):
            if svc_req_uuid is None:
                try:
                    # New request
                    svc_req_uuid = self._serviceinst.clt_new_request(
                        rsrc_id, req_dir)
                    # Write down the UUID
                    with io.open(req_uuid_file, 'w') as f:
                        f.write(svc_req_uuid)
                        os.fchmod(f.fileno(), 0o644)

                except OSError:
                    # Error registration failed, delete the request.
                    _LOGGER.exception('Unable to submit request')
                    fs.rmtree_safe(req_dir)

            else:
                self._serviceinst.clt_update_request(svc_req_uuid)
コード例 #23
0
    def on_create_request(self, rsrc_id, rsrc_data):

        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.info('Creating presence: %s', rsrc_data)
            app_name = appcfg.app_name(rsrc_id)

            # Register running.
            path = z.path.running(app_name)
            _LOGGER.info('Register running: %s, %s', path, self.hostname)
            if not self._safe_create(rsrc_id, path, self.hostname):
                _LOGGER.info('Waiting to expire: %s', path)
                return None

            self.presence[app_name][path] = rsrc_id

            # Register endpoints.
            for endpoint in rsrc_data.get('endpoints', []):
                internal_port = endpoint['port']
                ep_name = endpoint.get('name', str(internal_port))
                ep_port = endpoint['real_port']
                ep_proto = endpoint.get('proto', 'tcp')

                hostport = self.hostname + ':' + str(ep_port)

                path = z.path.endpoint(app_name, ep_proto, ep_name)
                _LOGGER.info('Register endpoint: %s, %s', path, hostport)
                if not self._safe_create(rsrc_id, path, hostport):
                    _LOGGER.info('Waiting to expire: %s', path)
                    return None

                self.presence[app_name][path] = rsrc_id

            # Register identity.
            identity_group = rsrc_data.get('identity_group')
            if identity_group:
                identity = rsrc_data.get('identity', _INVALID_IDENTITY)
                identity_data = {'host': self.hostname, 'app': app_name}

                path = z.path.identity_group(identity_group, str(identity))
                _LOGGER.info('Register identity: %s, %s', path, identity_data)
                if not self._safe_create(rsrc_id, path, identity_data):
                    _LOGGER.info('Waiting to expire: %s', path)
                    return None

                self.presence[app_name][path] = rsrc_id

        return {}
コード例 #24
0
    def _configure(self, instance_name):
        """Configures and starts the instance based on instance cached event.

        - Runs app_configure --approot <rootdir> cache/<instance>

        :param ``str`` instance_name:
            Name of the instance to configure
        :returns ``bool``:
            True for successfully configured container.
        """
        event_file = os.path.join(
            self.tm_env.cache_dir,
            instance_name
        )

        with lc.LogContext(_LOGGER, instance_name):
            try:
                _LOGGER.info('Configuring')
                container_dir = app_cfg.configure(self.tm_env, event_file,
                                                  self._runtime)
                if container_dir is None:
                    # configure step failed, skip.
                    fs.rm_safe(event_file)
                    return False

                # symlink_safe(link, target)
                fs.symlink_safe(
                    os.path.join(self.tm_env.running_dir, instance_name),
                    container_dir
                )
                return True

            except exc.ContainerSetupError as err:  # pylint: disable=W0703
                _LOGGER.exception('Error configuring (%r)', instance_name)
                app_abort.report_aborted(self.tm_env, instance_name,
                                         why=err.reason,
                                         payload=traceback.format_exc())
                fs.rm_safe(event_file)
                return False
            except Exception as err:  # pylint: disable=W0703
                _LOGGER.exception('Error configuring (%r)', instance_name)
                app_abort.report_aborted(self.tm_env, instance_name,
                                         why=app_abort.AbortedReason.UNKNOWN,
                                         payload=traceback.format_exc())
                fs.rm_safe(event_file)
                return False
コード例 #25
0
    def _on_deleted(self, impl, filepath):
        """Private handler for request deletion events.
        """
        req_id = os.path.basename(filepath)

        # Avoid triggerring on temporary files
        if req_id[0] == '.':
            return None

        # TODO: We should also validate the req_id format
        with lc.LogContext(_LOGGER, req_id,
                           adapter_cls=lc.ContainerAdapter) as log:

            log.debug('deleted %r', req_id)
            res = impl.on_delete_request(req_id)

        return res
コード例 #26
0
ファイル: scheduler.py プロジェクト: sattvic108/treadmill
def _explain(inst_id):
    """Explain application placement"""
    with lc.LogContext(_LOGGER, inst_id):
        start = time.time()
        ro_scheduler = get_readonly_scheduler()
        _LOGGER.info('ro_scheduler was ready in %s secs', time.time() - start)

        try:
            instance = ro_scheduler.cell.apps[inst_id]
        except KeyError:
            raise exc.NotFoundError(inst_id)

        if instance.server:
            raise exc.FoundError('instance {} is already placed on {}'.format(
                inst_id, instance.server))

        return reports.explain_placement(ro_scheduler.cell, instance,
                                         'servers')
コード例 #27
0
    def get(self, log_id, start=0, limit=None, order=None):
        """Get log file."""
        instance, uniq, logtype, component = log_id.split('/')
        with lc.LogContext(_LOGGER, '{}/{}'.format(instance, uniq)):
            log_f = self._get_logfile(instance, uniq, logtype, component)

            _LOGGER.info('Requested {} items starting from line {} '
                         'in {} order'.format(limit or 'all', start, order))

            if start is not None and start < 0:
                raise exc.InvalidInputError(
                    __name__,
                    'Index cannot be less than 0, got: {}'.format(start))

            with open(log_f) as log:
                if order == 'desc':
                    return _fragment_in_reverse(log, start, limit)

                return _fragment(log, start, limit)
コード例 #28
0
    def on_delete_request(self, rsrc_id):
        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.info('Deleting presence')
            app_name = appcfg.app_name(rsrc_id)

            to_delete = [
                path for path in self.presence[app_name]
                if self.presence[app_name][path] == rsrc_id
            ]

            for path in to_delete:
                self._safe_delete(path)
                del self.presence[app_name][path]

            if not self.presence[app_name]:
                del self.presence[app_name]

        return True
コード例 #29
0
    def delete(self, rsrc_id):
        """Delete an existing resource.

        :param `str` rsrc_id:
            Unique identifier for the requested resource.
        """
        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            req_dir = self._req_dirname(rsrc_id)
            try:
                with io.open(os.path.join(req_dir, self._REQ_UID_FILE)) as f:
                    svc_req_uuid = f.read().strip()
            except IOError as err:
                if err.errno == errno.ENOENT:
                    log.warning('Resource %r does not exist', rsrc_id)
                    return
                raise
            self._serviceinst.clt_del_request(svc_req_uuid)
            os.rename(req_dir, self._bck_dirname(svc_req_uuid))
コード例 #30
0
def _get_tickets(appname, app, container_dir):
    """Get tickets."""
    with lc.LogContext(_LOGGER, appname, lc.ContainerAdapter) as log:
        tkts_spool_dir = os.path.join(container_dir, 'root', 'var', 'spool',
                                      'tickets')

        reply = tickets.request_tickets(context.GLOBAL.zk.conn, appname)
        if reply:
            tickets.store_tickets(reply, tkts_spool_dir)

        # Check that all requested tickets are valid.
        for princ in app.get('tickets', []):
            krbcc_file = os.path.join(tkts_spool_dir, princ)
            if not tickets.krbcc_ok(krbcc_file):
                log.error('Missing or expired tickets: %s, %s', princ,
                          krbcc_file)
                raise exc.ContainerSetupError('tickets.%s' % princ)
            else:
                _LOGGER.info('Ticket ok: %s, %s', princ, krbcc_file)