def start_container(container_root, manifest): """Treadmill container boot process. """ _LOGGER.info('Initializing container: %s', container_root) app = app_manifest.read(manifest) try: pivot_root.make_root(container_root) os.chdir('/') except Exception as err: # pylint: disable=broad-except event = traceevents.AbortedTraceEvent( instanceid=app['name'], why=app_abort.AbortedReason.PIVOT_ROOT.value, payload=str(err), ) _abort(event, container_root) # reraise err to exit start_container raise err # XXX: Debug info _LOGGER.debug('Current mounts: %s', pprint.pformat(fs_linux.list_mounts())) # Clean the environ # TODO: Remove me once clean environment management is merged in. os.environ.pop('PYTHONPATH', None) os.environ.pop('LC_ALL', None) os.environ.pop('LANG', None) # Clear aliases path. os.environ.pop('TREADMILL_ALIASES_PATH', None) subproc.safe_exec(['s6_svscan', '-s', '/services'])
def start_container(ctx, container_root, manifest): """Treadmill container boot process. """ _LOGGER.info('Initializing container: %s', container_root) app = app_manifest.read(manifest) cgroup = ctx.obj.get('CGROUP') try: # if cgroups set, we need to remount cgroup path # so that from cgroup directory we only see container pids # <container_root>/sys/fs/cgroup/memory => # /sys/fs/cgroup/memory/treadmill/apps/<app-inst-unique>/services if cgroup: remount_cgroup(container_root, cgroup, ctx.obj['ROOT_CGROUP']) pivot_root.make_root(container_root) os.chdir('/') except Exception as err: # pylint: disable=broad-except event = traceevents.AbortedTraceEvent( instanceid=app['name'], why=app_abort.AbortedReason.PIVOT_ROOT.value, payload=str(err), ) _abort(event, container_root) # reraise err to exit start_container raise err # XXX: Debug info _LOGGER.debug('Current mounts: %s', pprint.pformat(fs_linux.list_mounts())) subproc.safe_exec(['/services/{}'.format(supervisor.SVC_INIT_FILE)])
def boot(approot, runtime, core_cpu_shares, core_cpuset_cpus, apps_cpuset_cpus, core_memory_limit, preserve_mounts): """Treadmill boot process. """ _LOGGER.info('Initializing Treadmill: %s (%s)', approot, runtime) tm_env = appenv.AppEnvironment(approot) tm_env.initialize(None) # We preserve anything mounted on the install root (mounted by # plugins?) and whatever path provided on the commandline. fs_linux.cleanup_mounts( [tm_env.root + '*'] + preserve_mounts.split(',') ) _cgroup_init( core_cpu_shares, core_cpuset_cpus, apps_cpuset_cpus, core_memory_limit ) subproc.safe_exec( [ 's6_svscan', '-s', tm_env.init_dir ] )
def start_container(container_root): """Treadmill container boot process. """ _LOGGER.info('Initializing container: %s', container_root) # We only preserve anything mounted on the container root. fs_linux.cleanup_mounts([container_root + '*'], ignore_exc=True) # Mount a proc in the new namespaces fs_linux.mount_procfs(container_root) # remount current proc to /proc # new process entering container namespace can see the correct /proc fs_linux.mount_procfs('/') # Chroot into the container os.chroot(container_root) os.chdir('/') # XXX: Debug info _LOGGER.debug('Current mounts: %s', pprint.pformat(fs_linux.list_mounts())) # Clean the environ # TODO: Remove me once clean environment management is merged in. del os.environ['PYTHONPATH'] del os.environ['LC_ALL'] del os.environ['LANG'] subproc.safe_exec(['s6_svscan', '-s', '/services'])
def version_monitor(): """Runs node version monitor.""" _LOGGER.info('Initializing version monitor') zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) while (not zkclient.exists(z.VERSION) or not zkclient.exists(z.VERSION_HISTORY)): _LOGGER.warning('namespace not ready.') time.sleep(30) hostname = sysinfo.hostname() distributions = { dist.project_name: dist.version for dist in iter(pkg_resources.working_set) if dist.project_name.startswith(_TREADMILL_DIST_PREFIX) } version = {'distributions': distributions, 'since': int(time.time())} for name in plugin_manager.names('treadmill.version_monitor'): plugin = plugin_manager.load('treadmill.version_monitor', name) version.update(plugin()) _save_version(zkclient, hostname, version) subproc.safe_exec(['sleep', 'inf'])
def haproxy(fs_root, config): """Run Treadmill HAProxy""" # keep sleeping until zksync ready zksync_utils.wait_for_ready(fs_root) # TODO: implment config creation by iterating over fs-root/app-groups. # We would get all app-groups, then add a frontend and backend for # each lbendpoint for this cell's region. Of course this would be # abstracted into treamdill.haproxy. subproc.safe_exec(['haproxy', '-f', config])
def haproxy(fs_root, config): """Run Treadmill HAProxy""" modified = os.path.join(fs_root, '.modified') while not os.path.exists(modified): _LOGGER.info('zk2fs mirror does not exist, waiting.') time.sleep(1) # TODO: implment config creation by iterating over fs-root/app-groups. # We would get all app-groups, then add a frontend and backend for # each lbendpoint for this cell's region. Of course this would be # abstracted into treamdill.haproxy. subproc.safe_exec(['haproxy', '-f', config])
def accept(tkt_spool_dir, port, appname, endpoint, use_v2): """Run ticket locker acceptor.""" if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, _SERVERS_ACL) zkutils.ensure_exists(context.GLOBAL.zk.conn, endpoint_proid_path, acl=[_SERVERS_ACL]) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() # Exec into tickets acceptor. If race condition will not allow it to # bind to the provided port, it will exit and registration will # happen again. if use_v2: subproc.safe_exec([ 'tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir) ]) else: subproc.safe_exec( ['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() # TODO: this will publish information about the endpoint state # under /discovery. Once discovery is refactored (if it will be) # we can remove the "manual" zookeeper manipulation. tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)])
def exec_fstrace(path): """exec's fstrace.""" _LOGGER.debug('watch path %r', path) subproc.safe_exec(['treadmill-fstrace', path])
def accept(tkt_spool_dir, approot, port, appname, endpoint, use_v2, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) # Exec into tickets acceptor. If race condition will not allow it to # bind to the provided port, it will exit and registration will # happen again. if use_v2: subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)]) else: subproc.safe_exec(['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])