def test__unshare_network_simple(self): """Tests unshare network sequence. """ # Access protected module _create_supervision_tree # pylint: disable=W0212 app = utils.to_obj({ 'name': 'proid.test#0', 'uniqueid': 'ID1234', 'environment': 'dev', 'network': { 'veth': 'id1234.0', 'vip': '192.168.1.1', 'gateway': '192.168.254.254', }, 'host_ip': '172.31.81.67', 'shared_ip': True, 'ephemeral_ports': [], 'endpoints': [{ 'real_port': '5007', 'proto': 'tcp', 'port': '22', 'type': 'infra' }, { 'real_port': '5013', 'proto': 'udp', 'port': '12345' }], }) app_unique_name = appmgr.app_unique_name(app) appmgr.run._unshare_network(self.app_env, app) treadmill.iptables.add_ip_set.assert_has_calls([ mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.1.1,tcp:22'), ]) self.app_env.rules.create_rule.assert_has_calls([ mock.call(rule=firewall.DNATRule('tcp', '172.31.81.67', '5007', '192.168.1.1', '22'), owner=app_unique_name), mock.call(rule=firewall.DNATRule('udp', '172.31.81.67', '5013', '192.168.1.1', '12345'), owner=app_unique_name) ], any_order=True) treadmill.newnet.create_newnet.assert_called_with( 'id1234.0', '192.168.1.1', '192.168.254.254', '172.31.81.67', )
def test__create_root_dir(self): """Test creation on the container root directory.""" # Access protected module _create_root_dir # pylint: disable=W0212 app = utils.to_obj( { 'proid': 'myproid', 'name': 'myproid.test#0', 'uniqueid': 'ID1234', 'environment': 'dev', 'disk': '100G', } ) app_unique_name = appmgr.app_unique_name(app) container_dir = os.path.join(self.root, 'apps', app_unique_name) mock_ld_client = self.app_env.svc_localdisk.make_client.return_value localdisk = { 'block_dev': '/dev/foo', } mock_ld_client.wait.return_value = localdisk treadmill.appmgr.run._create_root_dir(self.app_env, container_dir, '/some/root_dir', app) treadmill.fs.chroot_init.assert_called_with('/some/root_dir') treadmill.fs.create_filesystem.assert_called_with('/dev/foo') treadmill.fs.mount_filesystem('/dev/foo', '/some/root_dir') treadmill.fs.make_rootfs.assert_called_with('/some/root_dir', 'myproid') treadmill.fs.configure_plugins.assert_called_with( self.root, '/some/root_dir', app ) treadmill.fs.mkdir_safe.assert_called_with('/some/root_dir/.etc') treadmill.fs.mount_bind.assert_called_with( '/some/root_dir', '/treadmill', '/test_treadmill', ) shutil.copyfile.assert_call_with( os.path.join(self.app_env.root, 'etc/resolv.conf'), '/some/root_dir/.etc/resolve.conf' ) treadmill.subproc.check_call.assert_has_calls([ mock.call( [ 'mount', '-n', '--bind', os.path.join(self.app_env.root, 'etc/resolv.conf'), '/etc/resolv.conf' ] ) ])
def test__unshare_network_complex(self): """Test unshare network advanced sequence (ephemeral/passthrough).""" # Access protected module _create_supervision_tree # pylint: disable=W0212 app = utils.to_obj( { 'name': 'myproid.test#0', 'environment': 'dev', 'uniqueid': 'ID1234', 'network': { 'veth': 'id1234.0', 'vip': '192.168.0.2', 'gateway': '192.168.254.254' }, 'shared_ip': False, 'endpoints': [ { 'name': 'ssh', 'port': 54321, 'real_port': 54321, 'type': 'infra', } ], 'ephemeral_ports': [ 10000, 10001, 10002, ], 'passthrough': [ 'xxx', 'yyy', 'zzz', ], } ) app_unique_name = appmgr.app_unique_name(app) hosts_to_ip = { 'xxx': '4.4.4.4', 'yyy': '5.5.5.5', 'zzz': '5.5.5.5', } socket.gethostbyname.side_effect = lambda h: hosts_to_ip[h] self.app_env.rules.get_rules.return_value = set() treadmill.appmgr.run._unshare_network( self.app_env, app ) self.app_env.rules.create_rule.assert_has_calls([ mock.call(rule=firewall.DNATRule('172.31.81.67', 54321, '192.168.0.2', 54321), owner=app_unique_name), mock.call(rule=firewall.DNATRule('172.31.81.67', 10000, '192.168.0.2', 10000), owner=app_unique_name), mock.call(rule=firewall.DNATRule('172.31.81.67', 10001, '192.168.0.2', 10001), owner=app_unique_name), mock.call(rule=firewall.DNATRule('172.31.81.67', 10002, '192.168.0.2', 10002), owner=app_unique_name), mock.call(rule=firewall.PassThroughRule('4.4.4.4', '192.168.0.2'), owner=app_unique_name), mock.call(rule=firewall.PassThroughRule('5.5.5.5', '192.168.0.2'), owner=app_unique_name), ]) # Check that infra services + ephemeral ports are in the same set. treadmill.iptables.add_ip_set.assert_has_calls([ mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.0.2,tcp:54321'), mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.0.2,tcp:10000'), mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.0.2,tcp:10001'), mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.0.2,tcp:10002'), ]) treadmill.newnet.create_newnet.assert_called_with( 'id1234.0', '192.168.0.2', '192.168.254.254', None, )
def configure(tm_env, event): """Creates directory necessary for starting the application. This operation is idem-potent (it can be repeated). The directory layout is:: - (treadmill root) - apps - (app unique name) - app.yml run finish The 'run' script is responsible for creating container environment and starting svscan inside the container. The 'finish' script is invoked when container terminates and will deallocate any resources (NAT rules, etc) that were allocated for the container. """ # R0915: Need to refactor long function into smaller pieces. # # pylint: disable=R0915 # Load the app from the event try: manifest_data = app_manifest.load(tm_env, event) except IOError: # File is gone. Nothing to do. _LOGGER.exception("No event to load: %r", event) return # Freeze the app data into a namedtuple object app = utils.to_obj(manifest_data) # Check the identity we are going to run as. It needs to exists on the host # or we will fail later on as we try to seteuid. try: pwd.getpwnam(app.proid) except KeyError: _LOGGER.exception('Unable to find proid %r in passwd database.', app.proid) raise # Generate a unique name for the app uniq_name = appmgr.app_unique_name(app) # Create the app's running directory container_dir = os.path.join(tm_env.apps_dir, uniq_name) # We assume it is a 'resume' if the container directory already exists. is_resume = False try: os.makedirs(container_dir) except OSError as err: if err.errno == errno.EEXIST: _LOGGER.info('Resuming container %r', uniq_name) is_resume = True else: raise # Copy the event as 'manifest.yml' in the container dir shutil.copyfile( event, os.path.join(container_dir, 'manifest.yml') ) # Setup the service clients cgroup_client = tm_env.svc_cgroup.make_client( os.path.join(container_dir, 'cgroups') ) localdisk_client = tm_env.svc_localdisk.make_client( os.path.join(container_dir, 'localdisk') ) network_client = tm_env.svc_network.make_client( os.path.join(container_dir, 'network') ) # Store the app int the container_dir app_yml = os.path.join(container_dir, _APP_YML) with open(app_yml, 'w') as f: yaml.dump(manifest_data, stream=f) # Generate resources requests # Cgroup cgroup_req = { 'memory': app.memory, 'cpu': app.cpu, } # Local Disk localdisk_req = { 'size': app.disk, } # Network network_req = { 'environment': app.environment, } if not is_resume: cgroup_client.create(uniq_name, cgroup_req) localdisk_client.create(uniq_name, localdisk_req) else: cgroup_client.update(uniq_name, cgroup_req) localdisk_client.update(uniq_name, localdisk_req) if not app.shared_network: if not is_resume: network_client.create(uniq_name, network_req) else: network_client.update(uniq_name, network_req) # Mark the container as defaulting to down state utils.touch(os.path.join(container_dir, 'down')) # Generate the supervisor's run script app_run_cmd = ' '.join([ os.path.join(treadmill.TREADMILL, 'bin', 'treadmill'), 'sproc', 'run', container_dir ]) run_out_file = os.path.join(container_dir, 'run.out') utils.create_script(os.path.join(container_dir, 'run'), 'supervisor.run_no_log', log_out=run_out_file, cmd=app_run_cmd) _init_log_file(run_out_file, os.path.join(tm_env.apps_dir, "%s.run.out" % uniq_name)) # Unique name for the link, based on creation time. cleanup_link = os.path.join(tm_env.cleanup_dir, uniq_name) finish_cmd = '/bin/ln -snvf %s %s' % (container_dir, cleanup_link) utils.create_script(os.path.join(container_dir, 'finish'), 'supervisor.finish', service=app.name, proid=None, cmds=[finish_cmd]) appevents.post( tm_env.app_events_dir, events.ConfiguredTraceEvent( instanceid=app.name, uniqueid=app.uniqueid ) ) return container_dir
def _cleanup_network(tm_env, app, network_client): """Cleanup the network part of a container. """ # Generate a unique name for the app unique_name = appmgr.app_unique_name(app) try: app_network = network_client.get(unique_name) except services.ResourceServiceError: _LOGGER.warning('network never allocated') return if app_network is None: _LOGGER.info('Network resource already freed') return # Unconfigure passthrough if hasattr(app, 'passthrough'): _LOGGER.info('Deleting passthrough for: %r', app.passthrough) # Resolve all the hosts # FIXME: There is no guarantie the hosts will resolve to # the same IPs as they did during creation. ips = set([socket.gethostbyname(host) for host in app.passthrough]) for ip in ips: tm_env.rules.unlink_rule( rule=firewall.PassThroughRule(src_ip=ip, dst_ip=app_network['vip']), owner=unique_name, ) for endpoint in app.endpoints: tm_env.rules.unlink_rule( rule=firewall.DNATRule(proto=endpoint.proto, orig_ip=app.host_ip, orig_port=endpoint.real_port, new_ip=app_network['vip'], new_port=endpoint.port), owner=unique_name, ) # See if this was an "infra" endpoint and if so remove it # from the whitelist set. if getattr(endpoint, 'type', None) == 'infra': _LOGGER.debug('removing %s:%s from infra services set', app_network['vip'], endpoint.port) iptables.rm_ip_set( iptables.SET_INFRA_SVC, '{ip},{proto}:{port}'.format( ip=app_network['vip'], proto=endpoint.proto, port=endpoint.port, )) _cleanup_ports(tm_env, unique_name, app_network['vip'], app.ephemeral_ports.tcp, 'tcp') _cleanup_ports(tm_env, unique_name, app_network['vip'], app.ephemeral_ports.udp, 'udp') # Terminate any entries in the conntrack table iptables.flush_conntrack_table(app_network['vip']) # Cleanup network resources network_client.delete(unique_name)
def _cleanup(tm_env, zkclient, container_dir, app): """Cleanup a container that actually ran. """ # Too many branches. # # pylint: disable=R0912 rootdir = os.path.join(container_dir, 'root') # Generate a unique name for the app unique_name = appmgr.app_unique_name(app) # Create service clients cgroup_client = tm_env.svc_cgroup.make_client( os.path.join(container_dir, 'cgroups')) localdisk_client = tm_env.svc_localdisk.make_client( os.path.join(container_dir, 'localdisk')) network_client = tm_env.svc_network.make_client( os.path.join(container_dir, 'network')) # Make sure all processes are killed # FIXME(boysson): Should we use `kill_apps_in_cgroup` instead? _kill_apps_by_root(rootdir) # Setup the archive filename that will hold this container's data filetime = utils.datetime_utcnow().strftime('%Y%m%d_%H%M%S%f') archive_filename = os.path.join( container_dir, '{instance_name}_{hostname}_{timestamp}.tar'.format( instance_name=appmgr.appname_task_id(app.name), hostname=sysinfo.hostname(), timestamp=filetime)) # Tar up container root filesystem if archive list is in manifest try: localdisk = localdisk_client.get(unique_name) fs.archive_filesystem(localdisk['block_dev'], rootdir, archive_filename, app.archive) except services.ResourceServiceError: _LOGGER.warning('localdisk never allocated') except subprocess.CalledProcessError: _LOGGER.exception('Unable to archive root device of %r', unique_name) except: # pylint: disable=W0702 _LOGGER.exception('Unknown exception while archiving %r', unique_name) # Destroy the volume try: localdisk = localdisk_client.delete(unique_name) except (IOError, OSError) as err: if err.errno == errno.ENOENT: pass else: raise if not app.shared_network: _cleanup_network(tm_env, app, network_client) # Add metrics to archive rrd_file = os.path.join( tm_env.metrics_dir, 'apps', '{name}-{instanceid}-{uniqueid}.rrd'.format( name=app.app, instanceid=app.task, uniqueid=app.uniqueid, )) rrdutils.flush_noexc(rrd_file) _copy_metrics(rrd_file, container_dir) # Cleanup our cgroup resources try: cgroup_client.delete(unique_name) except (IOError, OSError) as err: if err.errno == errno.ENOENT: pass else: raise try: _archive_logs(tm_env, container_dir) except Exception: # pylint: disable=W0703 _LOGGER.exception('Unexpected exception storing local logs.') # Append or create the tarball with folders outside of container # Compress and send the tarball to HCP try: archive_filename = fs.tar(sources=container_dir, target=archive_filename, compression='gzip').name _send_container_archive(zkclient, app, archive_filename) except: # pylint: disable=W0702 _LOGGER.exception("Failed to update archive")