def run(self): """Prepares container environment and exec's container The function is intended to be invoked from 'run' script and never returns. :returns: This function never returns """ manifest_file = os.path.join(self.container_dir, appcfg.APP_JSON) manifest = app_manifest.read(manifest_file) if not self._can_run(manifest): raise exc.ContainerSetupError( 'Runtime {0} does not support {1}.'.format( self.__class__.__name__, manifest.get('type'))) # Intercept SIGTERM from supervisor, so that initialization is not # left in broken state. terminated = utils.make_signal_flag(utils.term_signal()) unique_name = appcfg.manifest_unique_name(manifest) watchdog_name = 'app_run-%s' % unique_name self.watchdog = self.tm_env.watchdogs.create( watchdog_name, self.run_timeout(manifest), 'Run of {container_dir!r} stalled'.format( container_dir=self.container_dir)) self._run(manifest, self.watchdog, terminated)
def configure(self, manifest): _LOGGER.info('Configuring krb5keytab.') unique_name = appcfg.manifest_unique_name(manifest) appdir = os.path.join(self._tm_env.apps_dir, unique_name, 'data', 'root') krb5keytab_svc = { 'name': 'krb5keytab', 'restart': { 'limit': 5, 'interval': 60, }, 'proid': 'root', 'root': True, 'command': ('{treadmill}/bin/treadmill --debug admin krb5keytab' ' --owner {user}' ' --principal {user}/{hostname}' ' --keytab {appdir}/var/spool/keytabs/{user}' ' --cachedir /tmp' ' --lockdir /tmp' '; exec sleep inf').format( treadmill=subproc.resolve('treadmill'), user=manifest['proid'], hostname=sysinfo.hostname(), appdir=appdir, ), 'environ': [{ 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }], 'config': None, 'downed': False, 'trace': False, 'logger': 's6.logger.run', } manifest['system_services'].append(krb5keytab_svc)
def run(tm_env, container_dir, manifest): """Creates container environment and prepares to exec root supervisor. """ _LOGGER.info('Running %r', container_dir) unique_name = appcfg.manifest_unique_name(manifest) # Generate resources requests fs.mkdir_safe(os.path.join(container_dir, 'resources')) cgroup_client = tm_env.svc_cgroup.make_client( os.path.join(container_dir, 'resources', 'cgroups') ) localdisk_client = tm_env.svc_localdisk.make_client( os.path.join(container_dir, 'resources', 'localdisk') ) network_client = tm_env.svc_network.make_client( os.path.join(container_dir, 'resources', 'network') ) # Cgroup cgroup_req = { 'memory': manifest['memory'], 'cpu': manifest['cpu'], } # Local Disk localdisk_req = { 'size': manifest['disk'], } # Network network_req = { 'environment': manifest['environment'], } cgroup_client.put(unique_name, cgroup_req) localdisk_client.put(unique_name, localdisk_req) if not manifest['shared_network']: network_client.put(unique_name, network_req) # Apply memory limits first thing, so that app_run does not consume memory # from treadmill/core. app_cgroups = cgroup_client.wait(unique_name) _apply_cgroup_limits(app_cgroups) localdisk = localdisk_client.wait(unique_name) app_network = network_client.wait(unique_name) img_impl = image.get_image(tm_env, manifest) manifest['network'] = app_network # FIXME: backward compatibility for TM 2.0. Remove in 3.0 manifest['vip'] = { 'ip0': app_network['gateway'], 'ip1': app_network['vip'], } # Allocate dynamic ports # # Ports are taken from ephemeral range, by binding to socket to port 0. # # Sockets are then put into global list, so that they are not closed # at gc time, and address remains in use for the lifetime of the # supervisor. sockets = runtime.allocate_network_ports( app_network['external_ip'], manifest ) app = runtime.save_app(manifest, container_dir) if not app.shared_network: _unshare_network(tm_env, container_dir, app) # Create and format the container root volume. root_dir = _create_root_dir(container_dir, localdisk) # NOTE: below here, MOUNT namespace is private # Unpack the image to the root directory. img_impl.unpack(container_dir, root_dir, app) # If network is shared, close sockets before starting the # supervisor, as these ports will be use be container apps. if app.shared_network: for socket_ in sockets: socket_.close() # hook container apphook.configure(tm_env, app, container_dir) subproc.exec_pid1( [ 's6_svscan', '-s', os.path.join(container_dir, 'sys') ], propagation='slave', # We need to keep our mapped ports open close_fds=False )
def add_linux_system_services(tm_env, manifest): """Configure linux system services.""" unique_name = appcfg.manifest_unique_name(manifest) container_svcdir = supervisor.open_service(os.path.join( tm_env.apps_dir, unique_name), existing=False) container_data_dir = container_svcdir.data_dir if 'vring' in manifest: # Add the Vring daemon services for cell in manifest['vring']['cells']: vring = { 'name': 'vring.%s' % cell, 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {treadmill}/bin/treadmill sproc' ' --zookeeper {zkurl}' ' --cell {cell}' ' vring' ' --approot {tm_root}' ' {manifest}').format(treadmill=subproc.resolve('treadmill'), zkurl=manifest['zookeeper'], cell=cell, tm_root=tm_env.root, manifest=os.path.join( container_data_dir, 'state.json')), 'environ': [ { 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }, ], 'config': None, 'downed': False, 'trace': False, } manifest['system_services'].append(vring) # Create ticket refresh and container/endpoint presence service register_presence = { 'name': 'register', 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {treadmill}/bin/treadmill sproc' ' --zookeeper {zkurl}' ' --cell {cell}' ' presence register' ' {manifest} {container_dir}').format( treadmill=subproc.resolve('treadmill'), zkurl=manifest['zookeeper'], cell=manifest['cell'], manifest=os.path.join(container_data_dir, 'state.json'), container_dir=container_data_dir), 'environ': [ { 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }, { 'name': 'TREADMILL_ALIASES_PATH', 'value': os.getenv('TREADMILL_ALIASES_PATH'), }, ], 'config': None, 'downed': False, 'trace': False, } manifest['system_services'].append(register_presence) # Create container /etc/hosts manager service run_overlay = os.path.join(container_data_dir, 'overlay', 'run') etc_overlay = os.path.join(container_data_dir, 'overlay', 'etc') hostaliases = { 'name': 'hostaliases', 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {treadmill}/bin/treadmill sproc' ' --cell {cell}' ' host-aliases' ' --aliases-dir {aliases_dir}' ' {hosts_original} {hosts_container}').format( treadmill=subproc.resolve('treadmill'), cell=manifest['cell'], aliases_dir=os.path.join( run_overlay, 'host-aliases', ), hosts_original=os.path.join('/', 'etc', 'hosts'), hosts_container=os.path.join(etc_overlay, 'hosts'), ), 'environ': [], 'downed': False, 'trace': False, } manifest['system_services'].append(hostaliases) # Create the user app top level supervisor # # Reset environment variables set by treadmill to default values. start_container = { 'name': 'start_container', 'proid': 'root', 'restart': { 'limit': 0, 'interval': 60, }, 'command': ('exec' ' {pid1} -i -m -p' ' --propagation slave' ' {treadmill}/bin/treadmill sproc' ' --cgroup /apps/{unique_name}/services' ' --cell {cell}' ' start-container' ' --container-root {container_dir}/root' ' {manifest}').format( treadmill=subproc.resolve('treadmill'), unique_name=unique_name, cell=manifest['cell'], pid1=subproc.resolve('pid1'), container_dir=container_data_dir, manifest=os.path.join(container_data_dir, 'state.json'), ), 'environ': [], 'config': None, 'downed': True, 'trace': False, } manifest['system_services'].append(start_container)
def vring_cmd(approot, manifest): """Run vring manager.""" context.GLOBAL.zk.conn.add_listener(zkutils.exit_on_disconnect) tm_env = appenv.AppEnvironment(approot) app = yaml.load(stream=manifest) with lc.LogContext(_LOGGER, app['name'], lc.ContainerAdapter) as log: # TODO(boysson): Remove all validation from here. utils.validate(app, [('vring', True, dict)]) ring = app['vring'] utils.validate(ring, [('rules', True, list), ('cells', True, list)]) if context.GLOBAL.cell not in ring['cells']: log.critical('cell %s not listed in vring.', context.GLOBAL.cell) sys.exit(-1) rules = ring['rules'] for rule in rules: utils.validate(rule, [('pattern', True, str), ('endpoints', True, list)]) # Create translation for endpoint name to expected port #. routing = {} for endpoint in app.get('endpoints', []): routing[endpoint['name']] = { 'port': endpoint['port'], 'proto': endpoint['proto'] } # Check that all ring endpoints are listed in the manifest. vring_endpoints = set() for rule in rules: for rule_endpoint in rule['endpoints']: if rule_endpoint not in routing: log.critical( 'vring references non-existing endpoint: [%s]', rule_endpoint) sys.exit(-1) vring_endpoints.add(rule_endpoint) # TODO: discovery is limited to one rule for now. if len(rules) != 1: log.critical('(TODO): multiple rules are not supported.') sys.exit(-1) pattern = rules[0]['pattern'] app_unique_name = appcfg.manifest_unique_name(app) app_discovery = discovery.Discovery(context.GLOBAL.zk.conn, pattern, '*') app_discovery.sync() # Restore default signal mask disabled by python spawning new # thread for Zk connection. # # TODO: should this be done as part of ZK connect? for sig in range(1, signal.NSIG): try: signal.signal(sig, signal.SIG_DFL) except RuntimeError: pass vring.run( routing, vring_endpoints, app_discovery, tm_env.rules, app['network']['vip'], app_unique_name, )
def run(tm_env, container_dir, manifest, watchdog, terminated): """Creates container environment and prepares to exec root supervisor. The function is intended to be invoked from 'run' script and never returns. :param tm_env: Treadmill application environment :type tm_env: `appenv.AppEnvironment` :param container_dir: Full path to the container :type container_dir: ``str`` :param manifest: App manifest. :type manifest: ``dict`` :param watchdog: App run watchdog. :type watchdog: ``treadmill.watchdog`` :param terminated: Flag where terminated signal will accumulate. :param terminated: ``set`` :returns: This function never returns """ with lc.LogContext(_LOGGER, os.path.basename(container_dir), lc.ContainerAdapter) as log: # R0915: Need to refactor long function into smaller pieces. # R0912: Too many branches # # pylint: disable=R0915,R0912 log.logger.info('Running %r', container_dir) # Allocate dynamic ports # # Ports are taken from ephemeral range, by binding to socket to port 0. # # Sockets are then put into global list, so that they are not closed # at gc time, and address remains in use for the lifetime of the # supervisor. sockets = _allocate_network_ports(tm_env.host_ip, manifest) unique_name = appcfg.manifest_unique_name(manifest) # First wait for the network device to be ready network_client = tm_env.svc_network.make_client( os.path.join(container_dir, 'network')) app_network = network_client.wait(unique_name) manifest['network'] = app_network # FIXME(boysson): backward compatibility for TM 2.0. Remove in 3.0 manifest['vip'] = { 'ip0': app_network['gateway'], 'ip1': app_network['vip'], } # Save the manifest with allocated vip and ports in the state state_file = os.path.join(container_dir, _STATE_YML) with tempfile.NamedTemporaryFile(dir=container_dir, delete=False, mode='w') as temp_file: yaml.dump(manifest, stream=temp_file) # chmod for the file to be world readable. os.fchmod( temp_file.fileno(), stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.rename(temp_file.name, state_file) # Freeze the app data into a namedtuple object app = utils.to_obj(manifest) if not app.shared_network: _unshare_network(tm_env, app) # Create root directory structure (chroot base). # container_dir/<subdir> root_dir = os.path.join(container_dir, 'root') # chroot_dir/<subdir> # FIXME(boysson): env_dir should be in a well defined location (part # of the container "API"). env_dir = os.path.join(root_dir, 'environ') # Create and format the container root volumne _create_root_dir(tm_env, container_dir, root_dir, app) # NOTE: below here, MOUNT namespace is private # FIXME(boysson): Lots of things are still reading this file. # Copy updated state manifest as app.yml in the # container_dir so it is visible in chrooted env. shutil.copy(state_file, os.path.join(root_dir, _APP_YML)) _create_environ_dir(env_dir, app) # Create the supervision tree _create_supervision_tree(container_dir, app) # Set app limits before chroot. _share_cgroup_info(app, root_dir) ldpreloads = [] if app.ephemeral_ports.tcp or app.ephemeral_ports.udp: treadmill_bind_preload = subproc.resolve( 'treadmill_bind_preload.so') ldpreloads.append(treadmill_bind_preload) _prepare_ldpreload(root_dir, ldpreloads) def _bind(src, tgt): """Helper function to bind source to target in the same root""" # FIXME(boysson): This name mount_bind() have counter-intuitive # arguments ordering. src_path = os.path.join(root_dir, src) if os.path.exists(src_path): fs.mount_bind(root_dir, tgt, target=src_path, bind_opt='--bind') # Override the /etc/resolv.conf, so that container always uses # dnscache. _bind('.etc/resolv.conf', '/etc/resolv.conf') _bind('.etc/hosts', '/etc/hosts') if ldpreloads: # Override /etc/ld.so.preload to enforce necessary system hooks _bind('.etc/ld.so.preload', '/etc/ld.so.preload') # If network is shared, close ephermal sockets before starting the # supervisor, as these ports will be use be container apps. if app.shared_network: for socket_ in sockets: socket_.close() # Override pam.d sshd stack with special sshd pam that unshares # network. _bind('.etc/pam.d/sshd.shared_network', '/etc/pam.d/sshd') # else: # # Override pam.d sshd stack. # _bind('.etc/pam.d/sshd', '/etc/pam.d/sshd') watchdog.remove() if not terminated: sys_dir = os.path.join(container_dir, 'sys') supervisor.exec_root_supervisor(sys_dir)
def vring_cmd(approot, manifest): """Run vring manager.""" context.GLOBAL.zk.conn.add_listener(zkutils.exit_on_disconnect) tm_env = appenv.AppEnvironment(approot) with io.open(manifest, 'r') as fd: app = json.load(fd) with lc.LogContext(_LOGGER, app['name'], lc.ContainerAdapter) as log: # TODO(boysson): Remove all validation from here. utils.validate(app, [('vring', True, dict)]) ring = app['vring'] utils.validate(ring, [('rules', True, list), ('cells', True, list)]) if context.GLOBAL.cell not in ring['cells']: log.critical('cell %s not listed in vring.', context.GLOBAL.cell) sys.exit(-1) rules = ring['rules'] for rule in rules: utils.validate(rule, [('pattern', True, str), ('endpoints', True, list)]) # Create translation for endpoint name to expected port #. routing = {} for endpoint in app.get('endpoints', []): routing[endpoint['name']] = { 'port': endpoint['port'], 'proto': endpoint['proto'] } # Check that all ring endpoints are listed in the manifest. vring_endpoints = set() for rule in rules: for rule_endpoint in rule['endpoints']: if rule_endpoint not in routing: log.critical( 'vring references non-existing endpoint: [%s]', rule_endpoint) sys.exit(-1) vring_endpoints.add(rule_endpoint) patterns = [rule['pattern'] for rule in rules] app_discovery = discovery.Discovery(context.GLOBAL.zk.conn, patterns, '*') app_discovery.sync() # Restore default signal mask disabled by python spawning new # thread for Zk connection. # # TODO: should this be done as part of ZK connect? utils.restore_signals() app_unique_name = appcfg.manifest_unique_name(app) vring.run( routing, vring_endpoints, app_discovery, tm_env.rules, app['network']['vip'], app_unique_name, )
def _add_linux_system_services(tm_env, manifest): """Configure linux system services.""" container_svcdir = supervisor.open_service(os.path.join( tm_env.apps_dir, appcfg.manifest_unique_name(manifest)), existing=False) container_data_dir = container_svcdir.data_dir if 'vring' in manifest: # Add the Vring daemon services for cell in manifest['vring']['cells']: vring = { 'name': 'vring.%s' % cell, 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {tm} sproc' ' --zookeeper {zkurl}' ' --cell {cell}' ' vring' ' --approot {tm_root}' ' {manifest}').format(tm=dist.TREADMILL_BIN, zkurl=manifest['zookeeper'], cell=cell, tm_root=tm_env.root, manifest=os.path.join( container_data_dir, 'state.json')), 'environ': [ { 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }, ], 'config': None, 'downed': False, 'trace': False, } manifest['system_services'].append(vring) # Create ticket refresh and container/endpoint presence service register_presence = { 'name': 'register', 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {tm} sproc' ' --zookeeper {zkurl}' ' --cell {cell}' ' presence register' ' --approot {tm_root}' ' {manifest} {container_dir}').format( tm=dist.TREADMILL_BIN, zkurl=manifest['zookeeper'], cell=manifest['cell'], tm_root=tm_env.root, manifest=os.path.join(container_data_dir, 'state.json'), container_dir=container_data_dir), 'environ': [ { 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }, { 'name': 'TREADMILL_ALIASES_PATH', 'value': os.getenv('TREADMILL_ALIASES_PATH'), }, ], 'config': None, 'downed': False, 'trace': False, } manifest['system_services'].append(register_presence) # Create container /etc/hosts manager service run_overlay = os.path.join(container_data_dir, 'overlay', 'run') etc_overlay = os.path.join(container_data_dir, 'overlay', 'etc') hostaliases = { 'name': 'hostaliases', 'proid': 'root', 'restart': { 'limit': 5, 'interval': 60, }, 'command': ('exec {tm} sproc' ' --cell {cell}' ' host-aliases' ' --aliases-dir {aliases_dir}' ' {hosts_original} {hosts_container}').format( tm=dist.TREADMILL_BIN, cell=manifest['cell'], aliases_dir=os.path.join( run_overlay, 'host-aliases', ), hosts_original=os.path.join('/', 'etc', 'hosts'), hosts_container=os.path.join(etc_overlay, 'hosts'), ), 'environ': [], 'downed': False, 'trace': False, } manifest['system_services'].append(hostaliases) # Create the user app top level supervisor start_container = { 'name': 'start_container', 'proid': 'root', 'restart': { 'limit': 0, 'interval': 60, }, 'command': ('exec {chroot} {container_dir}/root' ' {pid1} -m -p -i' ' {svscan} -s /services').format( chroot=subproc.resolve('chroot'), container_dir=container_data_dir, pid1=subproc.resolve('pid1'), svscan=subproc.resolve('s6_svscan'), ), 'environ': [], 'config': None, 'downed': True, 'trace': False, } manifest['system_services'].append(start_container) # Create the services monitor service monitor = { 'name': 'monitor', 'proid': 'root', 'restart': None, # Monitor should not be monitored 'command': ('exec {tm} sproc' ' --cell {cell}' ' monitor services' ' --approot {tm_root}' ' -c {container_dir}' ' -s {services_opts}').format( tm=dist.TREADMILL_BIN, cell=manifest['cell'], tm_root=tm_env.root, container_dir=container_svcdir.directory, # This adds all services beside monitor itself services_opts=' -s'.join([ os.path.join(container_data_dir, 'sys', s['name']) for s in manifest['system_services'] ] + [ os.path.join(container_data_dir, 'services', s['name']) for s in manifest['services'] ])), 'environ': [ { 'name': 'KRB5CCNAME', 'value': os.path.expandvars('FILE:${TREADMILL_HOST_TICKET}'), }, { 'name': 'TREADMILL_ALIASES_PATH', 'value': os.getenv('TREADMILL_ALIASES_PATH'), }, ], 'config': None, 'downed': False, 'trace': False, } manifest['system_services'].append(monitor)
def run(tm_env, container_dir, manifest, watchdog, terminated): """Creates container environment and prepares to exec root supervisor. """ _LOGGER.info('Running %r', container_dir) # Apply memory limits first thing, so that app_run does not consume memory # from treadmill/core. _apply_cgroup_limits(tm_env, container_dir, manifest) img_impl = image.get_image(tm_env, manifest) unique_name = appcfg.manifest_unique_name(manifest) # First wait for the network device to be ready network_client = tm_env.svc_network.make_client( os.path.join(container_dir, 'network')) app_network = network_client.wait(unique_name) manifest['network'] = app_network # FIXME: backward compatibility for TM 2.0. Remove in 3.0 manifest['vip'] = { 'ip0': app_network['gateway'], 'ip1': app_network['vip'], } # Allocate dynamic ports # # Ports are taken from ephemeral range, by binding to socket to port 0. # # Sockets are then put into global list, so that they are not closed # at gc time, and address remains in use for the lifetime of the # supervisor. sockets = runtime.allocate_network_ports(app_network['external_ip'], manifest) app = runtime.save_app(manifest, container_dir) if not app.shared_network: _unshare_network(tm_env, app) # Create root directory structure (chroot base). # container_dir/<subdir> root_dir = os.path.join(container_dir, 'root') # Create and format the container root volume. _create_root_dir(tm_env, container_dir, root_dir, app) # NOTE: below here, MOUNT namespace is private # Unpack the image to the root directory. img_impl.unpack(container_dir, root_dir, app) # If network is shared, close sockets before starting the # supervisor, as these ports will be use be container apps. if app.shared_network: for socket_ in sockets: socket_.close() watchdog.remove() if not terminated: # hook container apphook.configure(tm_env, app) sys_dir = os.path.join(container_dir, 'sys') supervisor.exec_root_supervisor(sys_dir)