Example #1
0
    def boot(approot, runtime,
             core_cpu_shares,
             core_cpuset_cpus, apps_cpuset_cpus,
             core_memory_limit,
             preserve_mounts):
        """Treadmill boot process.
        """
        _LOGGER.info('Initializing Treadmill: %s (%s)', approot, runtime)

        tm_env = appenv.AppEnvironment(approot)
        tm_env.initialize(None)

        # We preserve anything mounted on the install root (mounted by
        # plugins?) and whatever path provided on the commandline.
        fs_linux.cleanup_mounts(
            [tm_env.root + '*'] +
            preserve_mounts.split(',')
        )

        _cgroup_init(
            core_cpu_shares,
            core_cpuset_cpus, apps_cpuset_cpus,
            core_memory_limit
        )

        subproc.safe_exec(
            [
                's6_svscan',
                '-s',
                tm_env.init_dir
            ]
        )
Example #2
0
def create_endpoint_file(approot, port, appname, endpoint):
    """Create and link local endpoint file"""
    hostport = '%s:%s' % (sysinfo.hostname(), port)
    zkclinet = context.GLOBAL.zk.conn

    endpoint_proid_path = z.path.endpoint_proid(appname)
    acl = zkclinet.make_servers_acl()
    _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl)
    zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl])

    endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
    _LOGGER.info('Registering %s %s', endpoint_path, hostport)

    # Need to delete/create endpoints for the disovery to pick it up in
    # case of master restart.
    zkutils.ensure_deleted(zkclinet, endpoint_path)
    time.sleep(5)
    zkutils.put(zkclinet, endpoint_path, hostport)

    tm_env = appenv.AppEnvironment(approot)
    endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
    endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp')
    endpoints_mgr.create_spec(
        appname=appname,
        endpoint=endpoint,
        proto='tcp',
        real_port=port,
        pid=os.getpid(),
        port=port,
        owner='/proc/{}'.format(os.getpid()),
    )
Example #3
0
 def __init__(self, root, runtime, runtime_param=None):
     _LOGGER.info('init appcfgmgr: %s, %s, %s', root, runtime,
                  runtime_param)
     self.tm_env = appenv.AppEnvironment(root=root)
     self._is_active = False
     self._runtime = runtime
     self._runtime_param = runtime_param
    def test_get_spec(self):
        """Test get endpoint spec with partial pattern match.
        """
        tm_env = appenv.AppEnvironment(root=self.root)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)

        # pylint: disable=W0212
        self.assertIsNone(endpoints_mgr.get_spec())

        endpoints_mgr.create_spec(
            appname='appname##0000000001',
            proto='tcp',
            endpoint='nodeinfo',
            real_port=12345,
            pid=5213,
            port=8000,
            owner=None,
        )
        self.assertIsNotNone(endpoints_mgr.get_spec(proto='tcp'))
        self.assertEqual(
            endpoints_mgr.get_spec(proto='tcp'),
            endpoints_mgr.get_spec(endpoint='nodeinfo'),
        )
        self.assertEqual(
            endpoints_mgr.get_spec(proto='tcp'),
            endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo'),
        )
Example #5
0
    def run(approot, runtime, container_dir):
        """Runs container given a container dir."""
        # Intercept SIGTERM from s6 supervisor, so that initialization is not
        # left in broken state.
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            terminated = utils.make_signal_flag(utils.term_signal())
            tm_env = None
            try:
                log.info('run %r %r', approot, container_dir)
                tm_env = appenv.AppEnvironment(approot)

                app_runtime.get_runtime(runtime, tm_env,
                                        container_dir).run(terminated)

                # If we reach here, the application was terminated.

            except Exception as exc:  # pylint: disable=W0703
                if not terminated:
                    log.critical('Failed to start, app will be aborted.',
                                 exc_info=True)
                    app_abort.flag_aborted(tm_env, container_dir, exc)
                else:
                    log.logger.info('Exception while handling term, ignore.',
                                    exc_info=True)
 def run(approot, instance):
     """Starts discovery publisher process."""
     tm_env = appenv.AppEnvironment(approot)
     publisher = endpoints.EndpointPublisher(tm_env.endpoints_dir,
                                             context.GLOBAL.zk.conn,
                                             instance=instance)
     publisher.run()
Example #7
0
    def register_cmd(approot, refresh_interval, manifest, container_dir):
        """Register container presence."""
        try:
            _LOGGER.info('Configuring sigterm handler.')
            signal.signal(utils.term_signal(), sigterm_handler)

            tm_env = appenv.AppEnvironment(approot)
            app = app_manifest.read(manifest)

            app_presence = presence.EndpointPresence(context.GLOBAL.zk.conn,
                                                     app)

            # If tickets are not ok, app will be aborted.
            #
            # If tickets acquired successfully, services will start, and
            # tickets will be refreshed after each interval.
            refresh = False
            try:
                app_presence.register()
                refresh = _get_tickets(app, container_dir)
                _start_service_sup(tm_env, app, container_dir)
            except exc.ContainerSetupError as err:
                app_abort.abort(container_dir,
                                why=err.reason,
                                payload=traceback.format_exc())

            while True:
                # Need to sleep anyway even if not refreshing tickets.
                time.sleep(refresh_interval)
                if refresh:
                    _refresh_tickets(app, container_dir)
        finally:
            _LOGGER.info('Stopping zookeeper.')
            context.GLOBAL.zk.conn.stop()
Example #8
0
    def test_update_app_rrds(self):
        """Test update container rrds"""
        data = {
            'foo.bar-00001-KKmc7hBHskLWh': {
                'timestamp': 1
            },
            'foo.bar-00002-KKmc7hBHskLWj': {
                'timestamp': 3,
                'memory.usage_in_bytes': 10,
                'memory.soft_limit_in_bytes': 10,
                'memory.limit_in_bytes': 10,
                'cpuacct.usage': 3000000000,
                'cpu.shares': 1024,
                'blkio.throttle.io_service_bytes': {
                    '3:0': {
                        'Read': 5,
                        'Write': 3
                    }
                },
                'blkio.throttle.io_serviced': {
                    '3:0': {
                        'Read': 5,
                        'Write': 3
                    }
                },
                'fs.used_bytes': 10,
            },
        }
        rrdclient = mock.Mock()
        tm_env = appenv.AppEnvironment(self.root)
        # pylint: disable=W0212
        metrics._update_app_rrds(data, self.root, rrdclient, 5, tm_env)
        rrdclient.create.assert_has_calls([
            mock.call('{}/foo.bar-00002-KKmc7hBHskLWj.rrd'.format(self.root),
                      5, 10),
            mock.call('{}/foo.bar-00001-KKmc7hBHskLWh.rrd'.format(self.root),
                      5, 10),
        ],
                                          any_order=True)

        metrics_data = {
            'hardmem': 10,
            'softmem': 10,
            'blk_write_iops': 3,
            'memusage': 10,
            'fs_used_bytes': 10,
            'blk_read_bps': 5,
            'cpuusage_ratio': 0.000244140625,
            'cpuusage': 0.005,
            'blk_read_iops': 5,
            'cputotal': 3000000000,
            'blk_write_bps': 3,
            'timestamp': 3,
        }
        rrdclient.update.assert_has_calls([
            mock.call('{}/foo.bar-00002-KKmc7hBHskLWj.rrd'.format(self.root),
                      metrics_data,
                      metrics_time=3)
        ])
Example #9
0
    def finish(approot, runtime, container_dir):
        """Finish treadmill application on the node."""
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            log.info('finish (approot %s)', approot)
            tm_env = appenv.AppEnvironment(approot)

            app_runtime.get_runtime(runtime, tm_env, container_dir).finish()
Example #10
0
    def cleanup_instance(approot, runtime, instance, runtime_param):
        """Actually do the cleanup of the instance.
        """
        param = utils.equals_list2dict(runtime_param or [])

        tm_env = appenv.AppEnvironment(root=approot)
        cleaner = cleanup.Cleanup(tm_env)
        cleaner.invoke(runtime, instance, param)
Example #11
0
    def run(approot, config_dir):
        """Runs monitor."""
        tm_env = None
        if approot:
            tm_env = appenv.AppEnvironment(root=approot)

        mon = monitor.Monitor(tm_env=tm_env, config_dir=config_dir)
        mon.run()
Example #12
0
    def server(approot, register, port, auth, modules, title, cors_origin):
        """Runs nodeinfo server."""
        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        if register:
            zkclient = context.GLOBAL.zk.conn
            zkclient.add_listener(zkutils.exit_on_lost)

            appname = 'root.%s#%010d' % (hostname, os.getpid())
            app_pattern = 'root.%s#*' % (hostname)
            path = z.path.endpoint(appname, 'tcp', 'nodeinfo')
            _LOGGER.info('register endpoint: %s %s', path, hostport)
            zkutils.create(zkclient, path, hostport,
                           acl=[zkclient.make_servers_acl()],
                           ephemeral=True)

            # TODO: remove "legacy" endpoint registration once conversion is
            #       complete.
            tm_env = appenv.AppEnvironment(approot)
            # TODO: need to figure out how to handle windows.
            assert os.name != 'nt'
            endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
            endpoints_mgr.unlink_all(
                app_pattern, endpoint='nodeinfo', proto='tcp'
            )
            endpoints_mgr.create_spec(
                appname=appname,
                endpoint='nodeinfo',
                proto='tcp',
                real_port=port,
                pid=os.getpid(),
                port=port,
                owner='/proc/{}'.format(os.getpid()),
            )

        _LOGGER.info('Starting nodeinfo server on port: %s', port)

        utils.drop_privileges()

        api_paths = []
        if modules:
            api_modules = {module: None for module in modules}
            api_paths = api.init(
                api_modules,
                title.replace('_', ' '),
                cors_origin
            )

        rest_server = rest.TcpRestServer(port, auth_type=auth,
                                         protect=api_paths)
        rest_server.run()
Example #13
0
 def node_services(approot, scan_dir):
     """Setup a node services monitor enforcing restart policies.
     """
     tm_env = appenv.AppEnvironment(root=approot)
     mon = monitor.Monitor(scan_dirs=scan_dir,
                           service_dirs=None,
                           policy_impl=monitor.MonitorRestartPolicy,
                           down_action=monitor.MonitorNodeDown(tm_env))
     mon.run()
Example #14
0
        def tm_env(_metrics_api=None):
            """Lazy instantiate app environment."""
            if not self._tm_env:
                # TODO: we need to pass this parameter to api, unfortunately
                #       in current api framework it is not trivial.
                approot = os.environ['TREADMILL_APPROOT']
                _LOGGER.info('Using approot: %s', approot)
                self._tm_env = appenv.AppEnvironment(approot)

            return self._tm_env
    def alert_monitor_cmd(approot, plugin, max_queue_length, wait_interval):
        """Publish alerts.
        """
        tm_env = appenv.AppEnvironment(root=approot)
        watcher = dirwatch.DirWatcher(tm_env.alerts_dir)
        watcher.on_created = _get_on_create_handler(
            _load_alert_backend(plugin))

        _serve_forever(watcher, tm_env.alerts_dir, max_queue_length,
                       wait_interval)
Example #16
0
 def containers(approot, scan_dir):
     """Setup a monitor for the running containers.
     """
     tm_env = appenv.AppEnvironment(root=approot)
     mon = monitor.Monitor(
         scan_dirs=scan_dir,
         service_dirs=None,
         policy_impl=monitor.MonitorRestartPolicy,
         down_action=monitor.MonitorContainerCleanup(tm_env))
     mon.run()
    def run(approot, scan_interval, instance):
        """Starts portscan process."""
        _LOGGER.info('Staring portscan: scan interval: %d',
                     scan_interval)

        tm_env = appenv.AppEnvironment(approot)
        scanner = endpoints.PortScanner(tm_env.endpoints_dir,
                                        context.GLOBAL.zk.conn,
                                        scan_interval=scan_interval,
                                        instance=instance)
        scanner.run()
Example #18
0
 def services(approot, container_dir, service_dir):
     """Setup a services monitor enforcing restart policies.
     """
     tm_env = appenv.AppEnvironment(root=approot)
     mon = monitor.Monitor(
         scan_dirs=None,
         service_dirs=service_dir,
         policy_impl=monitor.MonitorRestartPolicy,
         down_action=monitor.MonitorContainerDown(container_dir),
         event_hook=monitor.PresenceMonitorEventHook(tm_env))
     mon.run()
Example #19
0
        def setUp(self, mock_resource_service):
            # W0221 Arguments number differs from overridden method
            # pylint: disable=W0221
            def _fake_service_factory(impl, *_args, **_kw_args):
                """Generate a unique mock object for each service implementation.
                """
                return mock.Mock(name=impl)

            mock_resource_service.side_effect = _fake_service_factory
            self.root = tempfile.mkdtemp()
            self.tm_env = appenv.AppEnvironment(root=self.root)
Example #20
0
    def finish(approot, runtime, container_dir, runtime_param):
        """Finish treadmill application on the node."""
        # Run with finish context as finish runs in cleanup.
        with lc.LogContext(_LOGGER, os.path.basename(container_dir),
                           lc.ContainerAdapter) as log:
            log.info('finish (approot %s)', approot)
            tm_env = appenv.AppEnvironment(approot)

            param = utils.equals_list2dict(runtime_param or [])
            app_runtime.get_runtime(
                runtime, tm_env, container_dir, param
            ).finish()
Example #21
0
    def top(no_lock, api, approot, once):
        """Sync LDAP data with Zookeeper data."""
        tm_env = appenv.AppEnvironment(root=approot)

        if not no_lock:
            lock = zkutils.make_lock(context.GLOBAL.zk.conn,
                                     z.path.election(__name__))
            _LOGGER.info('Waiting for leader lock.')
            with lock:
                _run_sync(api, tm_env.alerts_dir, once)
        else:
            _LOGGER.info('Running without lock.')
            _run_sync(api, tm_env.alerts_dir, once)
    def alert_monitor_cmd(approot, plugin):
        """Publish alerts."""
        tm_env = appenv.AppEnvironment(root=approot)
        watcher = dirwatch.DirWatcher(tm_env.alerts_dir)
        watcher.on_created = _get_on_create_handler(
            _load_alert_backend(plugin)
        )

        # if there are alerts in alerts_dir already
        for alert_file in os.listdir(tm_env.alerts_dir):
            watcher.on_created(os.path.join(tm_env.alerts_dir, alert_file))

        _serve_forever(watcher)
Example #23
0
    def run(approot, runtime, container_dir):
        """Runs container given a container dir."""
        # Make sure container_dir is a fully resolved path.
        container_dir = os.path.realpath(container_dir)

        _LOGGER.info('run %r %r', approot, container_dir)

        tm_env = appenv.AppEnvironment(approot)
        try:
            app_runtime.get_runtime(runtime, tm_env, container_dir).run()

        except Exception as exc:  # pylint: disable=W0703
            _LOGGER.exception('Failed to start, app will be aborted.')
            app_abort.flag_aborted(tm_env, container_dir, exc)
Example #24
0
    def cgroup():
        """Runs cgroup node service."""
        root_dir = local_ctx['root-dir']
        watchdogs_dir = local_ctx['watchdogs-dir']

        svc = services.ResourceService(
            service_dir=os.path.join(root_dir, 'cgroup_svc'),
            impl='cgroup',
        )

        svc.run(
            watchdogs_dir=os.path.join(root_dir, watchdogs_dir),
            tm_env=appenv.AppEnvironment(root_dir),
        )
Example #25
0
    def cleaning(approot, scan_dir):
        """Setup a monitor for the running containers.
        """
        tm_env = appenv.AppEnvironment(root=approot)

        def _policy_factory():
            return monitor.CleanupMonitorRestartPolicy(tm_env)

        mon = monitor.Monitor(scan_dirs=scan_dir,
                              service_dirs=None,
                              policy_impl=_policy_factory,
                              down_action=monitor.MonitorNodeDown(
                                  tm_env, prefix='cleanup-'))
        mon.run()
Example #26
0
    def __init__(self, approot, interval):
        self.cache = {'treadmill': {}, 'core': {}, 'app': {}}
        self._interval = interval

        self._tm_env = appenv.AppEnvironment(root=approot)
        self._sys_svcs = _sys_svcs(approot)
        # TODO: sys_maj_min will be used changing treadmill.metrics.app_metrics
        self._sys_maj_min = '{}:{}'.format(
            *fs_linux.maj_min_from_path(approot))
        self._sys_block_dev = fs_linux.maj_min_to_blk(
            *fs_linux.maj_min_from_path(approot))

        # if interval is zero, we just read one time
        if interval <= 0:
            self._read()
        else:
            self._loop()
Example #27
0
    def setUp(self):
        self.root = tempfile.mkdtemp()
        os.environ['TREADMILL_APPROOT'] = self.root
        self.tm_env = appenv.AppEnvironment(root=self.root)

        fs.mkdir_safe(self.tm_env.apps_dir)
        fs.mkdir_safe(self.tm_env.archives_dir)

        full_names = (
            ('proid.simplehttp', '0001025686', 'ymweWiRm86C7A'),
            ('proid.myapi.test', '0001027473', 'kJoV4j0DU6dtJ'),
        )
        for app, instance, uniq in full_names:
            link = '#'.join([app, instance])
            fs.mkfile_safe(os.path.join(self.tm_env.running_dir, link))

            target = '-'.join([app, instance, uniq])
            fs.mkdir_safe(os.path.join(self.tm_env.apps_dir, target, 'data'))

            fs.symlink_safe(
                os.path.join(self.tm_env.running_dir, link),
                os.path.join(self.tm_env.apps_dir, target),
            )

        files = (
            # incorrect file
            'proid.app-foo-bar#123.sys.tar.gz',
            'proid.app#123.sys.tar.gz',
            # correct file
            'proid.app-123-uniq.sys.tar.gz',
            'proid.test.sleep-901-uniq.sys.tar.gz',
        )
        for f in files:
            fs.mkfile_safe(os.path.join(self.tm_env.archives_dir, f))

        self.api = local.API()
Example #28
0
    def run(approot, runtime, container_dir, runtime_param=None):
        """Runs container given a container dir."""
        # Make sure container_dir is a fully resolved path.
        container_dir = os.path.realpath(container_dir)
        service = supervisor.open_service(container_dir)

        _LOGGER.info('run %r %r', approot, container_dir)

        tm_env = appenv.AppEnvironment(approot)
        param = utils.equals_list2dict(runtime_param or [])
        try:
            app_runtime.get_runtime(
                runtime, tm_env, service, param
            ).run()
        except exc.ContainerSetupError as err:
            _LOGGER.exception('Failed to start, app will be aborted.')
            app_abort.flag_aborted(service.data_dir,
                                   why=err.reason,
                                   payload=traceback.format_exc())
        except Exception as err:  # pylint: disable=W0703
            _LOGGER.exception('Failed to start, app will be aborted.')
            app_abort.flag_aborted(service.data_dir,
                                   why=app_abort.AbortedReason.UNKNOWN,
                                   payload=traceback.format_exc())
Example #29
0
    def __init__(self, root):
        _LOGGER.info('init eventmgr: %s', root)
        self.tm_env = appenv.AppEnvironment(root=root)

        self._hostname = sysinfo.hostname()
Example #30
0
    def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab):
        """Run ticket locker acceptor."""
        if keytab:
            _construct_keytab(keytab)

        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        acl = context.GLOBAL.zk.conn.make_servers_acl()
        _LOGGER.info(
            'Ensuring %s exists with ACL %r',
            endpoint_proid_path,
            acl
        )
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            endpoint_proid_path,
            acl=[acl]
        )

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        # TODO: this will publish information about the endpoint state
        #       under /discovery. Once discovery is refactored (if it will be)
        #       we can remove the "manual" zookeeper manipulation.
        tm_env = appenv.AppEnvironment(approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
        endpoints_mgr.unlink_all(
            appname=appname,
            endpoint=endpoint,
            proto='tcp'
        )
        endpoints_mgr.create_spec(
            appname=appname,
            endpoint=endpoint,
            proto='tcp',
            real_port=port,
            pid=os.getpid(),
            port=port,
            owner='/proc/{}'.format(os.getpid()),
        )

        subproc.safe_exec(['tkt_recv_v2',
                           '-p{}'.format(port),
                           '-d{}'.format(tkt_spool_dir)])